From fc43570506b941dd0da3672877e0711a4c1cb1b4 Mon Sep 17 00:00:00 2001 From: Stijnvandenbroek Date: Tue, 3 Mar 2026 22:02:25 +0000 Subject: [PATCH] feat: implement linting and testing --- .github/workflows/ci.yml | 84 +++++++++ .gitignore | 3 + .pre-commit-config.yaml | 43 +++++ .prettierignore | 10 + .prettierrc | 19 ++ .sqlfluff | 20 ++ Makefile | 32 +++- data_platform/assets/funda.py | 82 +++------ data_platform/definitions.py | 1 + data_platform/resources.py | 2 +- pyproject.toml | 31 ++++ tests/__init__.py | 0 tests/conftest.py | 35 ++++ tests/test_assets_funda.py | 336 ++++++++++++++++++++++++++++++++++ tests/test_helpers.py | 154 ++++++++++++++++ tests/test_resources.py | 88 +++++++++ 16 files changed, 884 insertions(+), 56 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .prettierignore create mode 100644 .prettierrc create mode 100644 .sqlfluff create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_assets_funda.py create mode 100644 tests/test_helpers.py create mode 100644 tests/test_resources.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..df785a0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,84 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + # ── Lint ──────────────────────────────────────────────────────────────── + lint-python: + name: Ruff (lint + format) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync + + - name: Ruff lint + run: uv run ruff check . + + - name: Ruff format check + run: uv run ruff format --check . + + lint-sql: + name: SQLFluff + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync + + - name: SQLFluff lint + run: uv run sqlfluff lint dbt/models --dialect postgres + + lint-yaml-json-md: + name: Prettier (YAML / JSON / Markdown) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install prettier + run: npm install --global prettier + + - name: Prettier check + run: | + prettier --check \ + "**/*.yml" "**/*.yaml" "**/*.json" "**/*.md" \ + --ignore-path .prettierignore + + # ── Tests ──────────────────────────────────────────────────────────────── + test: + name: Pytest + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync + + - name: Run tests + run: uv run pytest diff --git a/.gitignore b/.gitignore index 8fcc4cc..7a15699 100644 --- a/.gitignore +++ b/.gitignore @@ -23,5 +23,8 @@ dagster_home/logs/ dagster_home/schedule_logs/ dagster_home/compute_logs/ +# Node +node_modules/ + # Docker *.log diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9d7817f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +repos: + # ── Python: ruff (lint + format) ───────────────────────────────────────── + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.9 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format + + # ── YAML / JSON / Markdown: prettier ───────────────────────────────────── + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + types_or: [yaml, json, markdown] + exclude: | + (?x)^( + dbt/target/| + dbt/dbt_packages/| + \.venv/ + ) + + # ── SQL: sqlfluff ───────────────────────────────────────────────────────── + - repo: https://github.com/sqlfluff/sqlfluff + rev: 3.4.0 + hooks: + - id: sqlfluff-lint + additional_dependencies: [sqlfluff-templater-dbt, dbt-core, dbt-postgres] + - id: sqlfluff-fix + additional_dependencies: [sqlfluff-templater-dbt, dbt-core, dbt-postgres] + stages: [manual] # only run sqlfluff-fix manually (make lint-fix) + + # ── General hygiene ─────────────────────────────────────────────────────── + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-json + - id: check-merge-conflict + - id: mixed-line-ending + args: [--fix=lf] diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..3132ec1 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,10 @@ +# Generated / build artefacts +dbt/target/ +dbt/dbt_packages/ +.venv/ +dist/ +*.egg-info/ + +# Dagster storage +dagster_home/storage/ +dagster_home/logs/ diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..f902bf2 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,19 @@ +{ + "printWidth": 100, + "proseWrap": "always", + "tabWidth": 2, + "overrides": [ + { + "files": "*.yml", + "options": { "singleQuote": false } + }, + { + "files": "*.yaml", + "options": { "singleQuote": false } + }, + { + "files": "*.json", + "options": { "trailingComma": "none" } + } + ] +} diff --git a/.sqlfluff b/.sqlfluff new file mode 100644 index 0000000..bfff6bb --- /dev/null +++ b/.sqlfluff @@ -0,0 +1,20 @@ +[sqlfluff] +templater = jinja +dialect = postgres +max_line_length = 100 +# Exclude generated/vendor paths +exclude_rules = RF05 # don't require quoted identifiers +output_line_length = 120 + +[sqlfluff:templater:jinja] +# Stub dbt macros so sqlfluff can lint without a live dbt project +[sqlfluff:templater:jinja:macros] +source = "{% macro source(source_name, table_name) %}{{ source_name }}.{{ table_name }}{% endmacro %}" +ref = "{% macro ref(model_name) %}{{ model_name }}{% endmacro %}" + +[sqlfluff:indentation] +indent_unit = space +tab_space_size = 4 + +[sqlfluff:rules:layout.long_lines] +ignore_comment_lines = true diff --git a/Makefile b/Makefile index d43bfc9..9e62300 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,10 @@ .PHONY: help install sync build up down restart logs ps \ build-code reload-code \ dbt-parse dbt-run dbt-test dbt-build dbt-seed dbt-clean dbt-deps dbt-docs \ - dagster-dev lint test clean + dagster-dev \ + lint lint-fix lint-python lint-sql lint-format \ + pre-commit pre-commit-install \ + test clean # Default Docker Compose project containers COMPOSE := docker compose @@ -78,11 +81,34 @@ dbt-docs: ## Generate and serve dbt docs dagster-dev: ## Start Dagster webserver locally for development uv run dagster dev -## —— Quality —————————————————————————————————————————————————— -lint: ## Run ruff linter and formatter check +## —— Quality ────────────────────────────────────────────────── +lint: lint-python lint-sql lint-format ## Run all linters (ruff + sqlfluff + prettier) + +lint-python: ## Ruff lint + format check uv run ruff check . uv run ruff format --check . +lint-sql: ## SQLFluff lint on dbt/models + uv run sqlfluff lint dbt/models --dialect postgres + +lint-format: ## Prettier check (YAML / JSON / Markdown) + npx --yes prettier --check "**/*.yml" "**/*.yaml" "**/*.json" "**/*.md" \ + --ignore-path .prettierignore + +lint-fix: ## Auto-fix all linters (ruff + sqlfluff + prettier) + uv run ruff check --fix . + uv run ruff format . + uv run sqlfluff fix dbt/models --dialect postgres + npx --yes prettier --write "**/*.yml" "**/*.yaml" "**/*.json" "**/*.md" \ + --ignore-path .prettierignore + +## —— Pre-commit ──────────────────────────────────────────────── +pre-commit-install: ## Install pre-commit hooks into .git/hooks + uv run pre-commit install + +pre-commit: ## Run all pre-commit hooks against all files + uv run pre-commit run --all-files + test: ## Run pytest uv run pytest diff --git a/data_platform/assets/funda.py b/data_platform/assets/funda.py index 43aa979..f6b8352 100644 --- a/data_platform/assets/funda.py +++ b/data_platform/assets/funda.py @@ -9,8 +9,6 @@ parameters (location, price range, etc.) can be tweaked per run. """ import json -from datetime import datetime, timezone -from typing import Optional from dagster import ( AssetExecutionContext, @@ -19,6 +17,7 @@ from dagster import ( MetadataValue, asset, ) +from sqlalchemy import text from data_platform.resources import FundaResource, PostgresResource @@ -32,15 +31,15 @@ class FundaSearchConfig(Config): location: str = "amsterdam" offering_type: str = "buy" - price_min: Optional[int] = None - price_max: Optional[int] = None - area_min: Optional[int] = None - area_max: Optional[int] = None - plot_min: Optional[int] = None - plot_max: Optional[int] = None - object_type: Optional[str] = None # comma-separated, e.g. "house,apartment" - energy_label: Optional[str] = None # comma-separated, e.g. "A,A+,A++" - radius_km: Optional[int] = None + price_min: int | None = None + price_max: int | None = None + area_min: int | None = None + area_max: int | None = None + plot_min: int | None = None + plot_max: int | None = None + object_type: str | None = None # comma-separated, e.g. "house,apartment" + energy_label: str | None = None # comma-separated, e.g. "A,A+,A++" + radius_km: int | None = None sort: str = "newest" max_pages: int = 3 @@ -147,7 +146,7 @@ CREATE TABLE IF NOT EXISTS {_SCHEMA}.price_history ( def _safe(val): """Convert non-serialisable values (tuples, lists of dicts, etc.) for JSONB.""" - if isinstance(val, (list, dict, tuple)): + if isinstance(val, list | dict | tuple): return json.dumps(val, default=str) return val @@ -159,7 +158,10 @@ def _safe_int(val): try: return int(val) except (ValueError, TypeError): - return None + try: + return int(float(val)) + except (ValueError, TypeError): + return None # --------------------------------------------------------------------------- @@ -201,7 +203,7 @@ def funda_search_results( if config.object_type: kwargs["object_type"] = [t.strip() for t in config.object_type.split(",")] if config.energy_label: - kwargs["energy_label"] = [l.strip() for l in config.energy_label.split(",")] + kwargs["energy_label"] = [lbl.strip() for lbl in config.energy_label.split(",")] if config.radius_km is not None: kwargs["radius_km"] = config.radius_km @@ -224,14 +226,10 @@ def funda_search_results( # Write to Postgres engine = postgres.get_engine() with engine.begin() as conn: - conn.execute( - __import__("sqlalchemy").text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}") - ) - conn.execute(__import__("sqlalchemy").text(_DDL_SEARCH)) + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}")) + conn.execute(text(_DDL_SEARCH)) # Truncate before inserting fresh results - conn.execute( - __import__("sqlalchemy").text(f"TRUNCATE TABLE {_SCHEMA}.search_results") - ) + conn.execute(text(f"TRUNCATE TABLE {_SCHEMA}.search_results")) rows = [] for listing in all_listings: @@ -304,18 +302,12 @@ def funda_listing_details( engine = postgres.get_engine() with engine.begin() as conn: - conn.execute( - __import__("sqlalchemy").text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}") - ) - conn.execute(__import__("sqlalchemy").text(_DDL_DETAILS)) + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}")) + conn.execute(text(_DDL_DETAILS)) # Read listing IDs from search results with engine.connect() as conn: - result = conn.execute( - __import__("sqlalchemy").text( - f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results" - ) - ) + result = conn.execute(text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results")) ids = [row[0] for row in result if row[0]] if not ids: @@ -326,9 +318,7 @@ def funda_listing_details( # Truncate before inserting with engine.begin() as conn: - conn.execute( - __import__("sqlalchemy").text(f"TRUNCATE TABLE {_SCHEMA}.listing_details") - ) + conn.execute(text(f"TRUNCATE TABLE {_SCHEMA}.listing_details")) rows = [] errors = 0 @@ -411,8 +401,7 @@ def funda_listing_details( postgres.execute_many(insert_sql, rows) context.log.info( - f"Inserted {len(rows)} listing details ({errors} errors) " - f"into {_SCHEMA}.listing_details" + f"Inserted {len(rows)} listing details ({errors} errors) into {_SCHEMA}.listing_details" ) return MaterializeResult( @@ -442,33 +431,23 @@ def funda_price_history( engine = postgres.get_engine() with engine.begin() as conn: - conn.execute( - __import__("sqlalchemy").text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}") - ) - conn.execute(__import__("sqlalchemy").text(_DDL_PRICE_HISTORY)) + conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}")) + conn.execute(text(_DDL_PRICE_HISTORY)) # Read listings from details table with engine.connect() as conn: - result = conn.execute( - __import__("sqlalchemy").text( - f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details" - ) - ) + result = conn.execute(text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details")) ids = [row[0] for row in result if row[0]] if not ids: - context.log.warning( - "No listing details found – run funda_listing_details first." - ) + context.log.warning("No listing details found – run funda_listing_details first.") return MaterializeResult(metadata={"count": 0}) context.log.info(f"Fetching price history for {len(ids)} listings …") # Truncate before inserting with engine.begin() as conn: - conn.execute( - __import__("sqlalchemy").text(f"TRUNCATE TABLE {_SCHEMA}.price_history") - ) + conn.execute(text(f"TRUNCATE TABLE {_SCHEMA}.price_history")) rows = [] errors = 0 @@ -507,8 +486,7 @@ def funda_price_history( postgres.execute_many(insert_sql, rows) context.log.info( - f"Inserted {len(rows)} price history records ({errors} errors) " - f"into {_SCHEMA}.price_history" + f"Inserted {len(rows)} price history records ({errors} errors) into {_SCHEMA}.price_history" ) return MaterializeResult( diff --git a/data_platform/definitions.py b/data_platform/definitions.py index fed4a74..8708a6a 100644 --- a/data_platform/definitions.py +++ b/data_platform/definitions.py @@ -26,6 +26,7 @@ dbt_project.prepare_if_dev() # dbt assets – every dbt model/test/snapshot becomes a Dagster asset # --------------------------------------------------------------------------- + @dbt_assets(manifest=dbt_project.manifest_path) def dbt_project_assets(context, dbt: DbtCliResource): yield from dbt.cli(["build"], context=context).stream() diff --git a/data_platform/resources.py b/data_platform/resources.py index d47eab4..11d064f 100644 --- a/data_platform/resources.py +++ b/data_platform/resources.py @@ -2,7 +2,7 @@ import os -from dagster import ConfigurableResource, EnvVar +from dagster import ConfigurableResource from funda import Funda from sqlalchemy import create_engine, text diff --git a/pyproject.toml b/pyproject.toml index 4fdbd11..ec51158 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,4 +27,35 @@ attribute = "defs" dev = [ "pytest", "dagster-webserver", + "pre-commit", + "ruff", + "sqlfluff", + "sqlfluff-templater-dbt", ] + +[tool.ruff] +target-version = "py311" +line-length = 100 + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "C4", # flake8-comprehensions +] +ignore = [ + "E501", # line too long – handled by formatter + "B008", # function call in default argument (Dagster configs use this) +] + +[tool.ruff.lint.isort] +known-first-party = ["data_platform"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "-v" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..60ff38d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,35 @@ +"""Shared test fixtures.""" + +from unittest.mock import MagicMock + + +def make_mock_engine(select_rows: list[tuple] | None = None): + """Return a mock SQLAlchemy engine. + + Args: + select_rows: Rows to return from *connect()* (SELECT queries). + Defaults to an empty list. + """ + select_rows = select_rows or [] + + engine = MagicMock() + + # engine.begin() context manager → conn for DDL / writes + write_conn = MagicMock() + engine.begin.return_value.__enter__ = MagicMock(return_value=write_conn) + engine.begin.return_value.__exit__ = MagicMock(return_value=False) + + # engine.connect() context manager → conn for SELECTs + read_conn = MagicMock() + read_conn.execute.return_value = iter(select_rows) + engine.connect.return_value.__enter__ = MagicMock(return_value=read_conn) + engine.connect.return_value.__exit__ = MagicMock(return_value=False) + + return engine, write_conn, read_conn + + +def make_mock_listing(data: dict): + """Return a mock pyfunda Listing-like object.""" + listing = MagicMock() + listing.to_dict.return_value = data + return listing diff --git a/tests/test_assets_funda.py b/tests/test_assets_funda.py new file mode 100644 index 0000000..4c2b861 --- /dev/null +++ b/tests/test_assets_funda.py @@ -0,0 +1,336 @@ +"""Tests for Funda Dagster assets using mocked external dependencies.""" + +from unittest.mock import MagicMock + +from dagster import materialize + +from data_platform.assets.funda import ( + FundaSearchConfig, + funda_listing_details, + funda_price_history, + funda_search_results, +) +from tests.conftest import make_mock_engine, make_mock_listing + +# --------------------------------------------------------------------------- +# Duck-typed mock resources (bypass frozen-Pydantic ConfigurableResource) +# --------------------------------------------------------------------------- + + +class MockFundaResource: + """Minimal test double for FundaResource.""" + + def __init__(self, client): + self._client = client + + def get_client(self): + return self._client + + +class MockPostgresResource: + """Minimal test double for PostgresResource.""" + + def __init__(self, engine=None, inserted_rows: list | None = None): + self._engine = engine or make_mock_engine()[0] + self._inserted_rows = inserted_rows if inserted_rows is not None else [] + + def get_engine(self): + return self._engine + + def execute(self, statement, params=None): + pass + + def execute_many(self, statement, rows): + self._inserted_rows.extend(rows) + + +# --------------------------------------------------------------------------- +# Shared listing data +# --------------------------------------------------------------------------- + +_SEARCH_LISTING_DATA = { + "global_id": "1234567", + "title": "Teststraat 1", + "city": "Amsterdam", + "postcode": "1234AB", + "province": "Noord-Holland", + "neighbourhood": "Centrum", + "price": 350000, + "living_area": 80, + "plot_area": None, + "bedrooms": 3, + "rooms": 5, + "energy_label": "A", + "object_type": "apartment", + "offering_type": "buy", + "construction_type": "existing", + "publish_date": "2026-01-15", + "broker_id": "999", + "broker_name": "Test Makelaars", +} + +_DETAIL_LISTING_DATA = { + **_SEARCH_LISTING_DATA, + "tiny_id": "87654321", + "municipality": "Amsterdam", + "price_formatted": "\u20ac 350.000 k.k.", + "status": "available", + "house_type": "Appartement", + "construction_year": "1985", + "description": "A lovely apartment.", + "publication_date": "2026-01-15", + "latitude": 52.37, + "longitude": 4.89, + "has_garden": False, + "has_balcony": True, + "has_solar_panels": False, + "has_heat_pump": False, + "has_roof_terrace": False, + "is_energy_efficient": True, + "is_monument": False, + "url": "https://www.funda.nl/detail/koop/amsterdam/app/87654321/", + "photo_count": 12, + "views": 150, + "saves": 30, +} + + +# --------------------------------------------------------------------------- +# funda_search_results +# --------------------------------------------------------------------------- + + +class TestFundaSearchResults: + def _run(self, mock_client, inserted_rows=None, config=None): + engine, _, _ = make_mock_engine() + rows = inserted_rows if inserted_rows is not None else [] + result = materialize( + [funda_search_results], + resources={ + "funda": MockFundaResource(mock_client), + "postgres": MockPostgresResource(engine, rows), + }, + run_config={ + "ops": {"funda_search_results": {"config": {"max_pages": 1, **(config or {})}}} + }, + ) + return result + + def test_no_results_returns_count_zero(self): + client = MagicMock() + client.search_listing.return_value = [] + result = self._run(client) + assert result.success + mat = result.asset_materializations_for_node("funda_search_results") + assert mat[0].metadata["count"].value == 0 + + def test_results_are_inserted(self): + client = MagicMock() + client.search_listing.return_value = [make_mock_listing(_SEARCH_LISTING_DATA)] + rows = [] + result = self._run(client, inserted_rows=rows) + assert result.success + assert len(rows) == 1 + assert rows[0]["city"] == "Amsterdam" + assert rows[0]["price"] == 350000 + + def test_pagination_stops_on_empty_page(self): + client = MagicMock() + client.search_listing.side_effect = [ + [make_mock_listing(_SEARCH_LISTING_DATA)], + [], + ] + inserted = [] + result = materialize( + [funda_search_results], + resources={ + "funda": MockFundaResource(client), + "postgres": MockPostgresResource(make_mock_engine()[0], inserted), + }, + run_config={"ops": {"funda_search_results": {"config": {"max_pages": 3}}}}, + ) + assert result.success + assert client.search_listing.call_count == 2 + assert len(inserted) == 1 + + def test_location_split_by_comma(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"location": "amsterdam, rotterdam"}) + call_kwargs = client.search_listing.call_args[1] + assert call_kwargs["location"] == ["amsterdam", "rotterdam"] + + def test_price_max_forwarded(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"price_max": 500000}) + assert client.search_listing.call_args[1]["price_max"] == 500000 + + def test_price_min_forwarded(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"price_min": 200000}) + assert client.search_listing.call_args[1]["price_min"] == 200000 + + def test_area_min_forwarded(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"area_min": 50}) + assert client.search_listing.call_args[1]["area_min"] == 50 + + def test_radius_km_forwarded(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"location": "1012AB", "radius_km": 10}) + assert client.search_listing.call_args[1]["radius_km"] == 10 + + def test_object_type_split_by_comma(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"object_type": "house, apartment"}) + assert client.search_listing.call_args[1]["object_type"] == ["house", "apartment"] + + def test_energy_label_split_by_comma(self): + client = MagicMock() + client.search_listing.return_value = [] + self._run(client, config={"energy_label": "A, A+"}) + assert client.search_listing.call_args[1]["energy_label"] == ["A", "A+"] + + +# --------------------------------------------------------------------------- +# funda_listing_details +# --------------------------------------------------------------------------- + + +class TestFundaListingDetails: + def _run(self, mock_client, engine, inserted_rows=None): + rows = inserted_rows if inserted_rows is not None else [] + return materialize( + [funda_listing_details], + resources={ + "funda": MockFundaResource(mock_client), + "postgres": MockPostgresResource(engine, rows), + }, + ) + + def test_no_search_results_returns_count_zero(self): + engine, _, _ = make_mock_engine(select_rows=[]) + client = MagicMock() + result = self._run(client, engine) + assert result.success + mat = result.asset_materializations_for_node("funda_listing_details") + assert mat[0].metadata["count"].value == 0 + + def test_details_fetched_and_inserted(self): + engine, _, _ = make_mock_engine(select_rows=[("1234567",)]) + client = MagicMock() + client.get_listing.return_value = make_mock_listing(_DETAIL_LISTING_DATA) + inserted = [] + result = self._run(client, engine, inserted) + assert result.success + assert len(inserted) == 1 + assert inserted[0]["city"] == "Amsterdam" + assert inserted[0]["status"] == "available" + assert inserted[0]["has_balcony"] is True + assert inserted[0]["has_garden"] is False + + def test_failed_fetch_counted_as_error(self): + engine, _, _ = make_mock_engine(select_rows=[("1234567",), ("9999999",)]) + client = MagicMock() + client.get_listing.side_effect = [ + make_mock_listing(_DETAIL_LISTING_DATA), + RuntimeError("API error"), + ] + inserted = [] + result = self._run(client, engine, inserted) + assert result.success + mat = result.asset_materializations_for_node("funda_listing_details") + assert mat[0].metadata["errors"].value == 1 + assert len(inserted) == 1 + + +# --------------------------------------------------------------------------- +# funda_price_history +# --------------------------------------------------------------------------- + + +class TestFundaPriceHistory: + def _run(self, mock_client, engine, inserted_rows=None): + rows = inserted_rows if inserted_rows is not None else [] + return materialize( + [funda_price_history], + resources={ + "funda": MockFundaResource(mock_client), + "postgres": MockPostgresResource(engine, rows), + }, + ) + + def test_no_details_returns_count_zero(self): + engine, _, _ = make_mock_engine(select_rows=[]) + client = MagicMock() + result = self._run(client, engine) + assert result.success + mat = result.asset_materializations_for_node("funda_price_history") + assert mat[0].metadata["count"].value == 0 + + def test_price_history_inserted(self): + engine, _, _ = make_mock_engine(select_rows=[("1234567",)]) + client = MagicMock() + client.get_listing.return_value = make_mock_listing(_DETAIL_LISTING_DATA) + client.get_price_history.return_value = [ + { + "price": 350000, + "human_price": "\u20ac350.000", + "date": "1 jan, 2026", + "timestamp": "2026-01-01T00:00:00", + "source": "Funda", + "status": "asking_price", + }, + { + "price": 320000, + "human_price": "\u20ac320.000", + "date": "1 jan, 2024", + "timestamp": "2024-01-01T00:00:00", + "source": "WOZ", + "status": "woz", + }, + ] + inserted = [] + result = self._run(client, engine, inserted) + assert result.success + assert len(inserted) == 2 + assert inserted[0]["source"] == "Funda" + assert inserted[1]["source"] == "WOZ" + mat = result.asset_materializations_for_node("funda_price_history") + assert mat[0].metadata["count"].value == 2 + + +# --------------------------------------------------------------------------- +# FundaSearchConfig +# --------------------------------------------------------------------------- + + +class TestFundaSearchConfig: + def test_defaults(self): + cfg = FundaSearchConfig() + assert cfg.location == "amsterdam" + assert cfg.offering_type == "buy" + assert cfg.sort == "newest" + assert cfg.max_pages == 3 + assert cfg.price_min is None + + def test_custom_values(self): + cfg = FundaSearchConfig( + location="rotterdam", + offering_type="rent", + price_max=2000, + max_pages=1, + ) + assert cfg.location == "rotterdam" + assert cfg.offering_type == "rent" + assert cfg.price_max == 2000 + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..6e67cbd --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,154 @@ +"""Tests for pure helper functions in data_platform.assets.funda.""" + +from data_platform.assets.funda import ( + _details_preview_table, + _safe, + _safe_int, + _search_preview_table, +) + +# ── _safe_int ─────────────────────────────────────────────────────────────── + + +class TestSafeInt: + def test_none_returns_none(self): + assert _safe_int(None) is None + + def test_integer_passthrough(self): + assert _safe_int(42) == 42 + + def test_negative_integer(self): + assert _safe_int(-10) == -10 + + def test_zero(self): + assert _safe_int(0) == 0 + + def test_string_int(self): + assert _safe_int("123") == 123 + + def test_float_truncated(self): + assert _safe_int(3.9) == 3 + + def test_float_string(self): + assert _safe_int("7.0") == 7 + + def test_non_numeric_string_returns_none(self): + assert _safe_int("abc") is None + + def test_empty_string_returns_none(self): + assert _safe_int("") is None + + def test_list_returns_none(self): + assert _safe_int([1, 2, 3]) is None + + +# ── _safe ──────────────────────────────────────────────────────────────────── + + +class TestSafe: + def test_dict_becomes_json_string(self): + result = _safe({"key": "val"}) + assert result == '{"key": "val"}' + + def test_list_becomes_json_string(self): + result = _safe([1, 2, 3]) + assert result == "[1, 2, 3]" + + def test_tuple_becomes_json_string(self): + result = _safe((1, 2)) + assert result == "[1, 2]" + + def test_string_passthrough(self): + assert _safe("hello") == "hello" + + def test_integer_passthrough(self): + assert _safe(99) == 99 + + def test_none_passthrough(self): + assert _safe(None) is None + + def test_nested_dict_serialised(self): + data = {"a": {"b": [1, 2]}} + result = _safe(data) + import json + + assert json.loads(result) == data + + +# ── _search_preview_table ──────────────────────────────────────────────────── + + +class TestSearchPreviewTable: + def test_empty_rows_returns_header_only(self): + result = _search_preview_table([]) + lines = result.split("\n") + assert len(lines) == 2 + assert "Title" in lines[0] + assert "---" in lines[1] + + def test_single_row_appears(self): + rows = [ + { + "title": "Teststraat 1", + "city": "Amsterdam", + "price": 350000, + "living_area": 80, + "bedrooms": 3, + } + ] + result = _search_preview_table(rows) + assert "Teststraat 1" in result + assert "Amsterdam" in result + assert "€350,000" in result + assert "80 m²" in result + assert "3" in result + + def test_missing_price_shows_dash(self): + rows = [{"title": "No Price", "city": "Rotterdam", "price": None}] + result = _search_preview_table(rows) + assert "–" in result + + def test_missing_area_shows_dash(self): + rows = [{"title": "No Area", "city": "Utrecht", "living_area": None}] + result = _search_preview_table(rows) + assert "–" in result + + def test_multiple_rows_correct_count(self): + rows = [{"title": f"St {i}", "city": "City", "price": i * 1000} for i in range(5)] + result = _search_preview_table(rows) + lines = result.split("\n") + # header + separator + 5 data rows + assert len(lines) == 7 + + +# ── _details_preview_table ─────────────────────────────────────────────────── + + +class TestDetailsPreviewTable: + def test_empty_rows_returns_header_only(self): + result = _details_preview_table([]) + lines = result.split("\n") + assert len(lines) == 2 + assert "Title" in lines[0] + + def test_row_with_all_fields(self): + rows = [ + { + "title": "Kerkstraat 5", + "city": "Haarlem", + "price": 425000, + "status": "available", + "energy_label": "A", + } + ] + result = _details_preview_table(rows) + assert "Kerkstraat 5" in result + assert "Haarlem" in result + assert "€425,000" in result + assert "available" in result + assert "A" in result + + def test_missing_price_shows_dash(self): + rows = [{"title": "T", "city": "C", "price": None, "status": "sold"}] + result = _details_preview_table(rows) + assert "–" in result diff --git a/tests/test_resources.py b/tests/test_resources.py new file mode 100644 index 0000000..8de95e7 --- /dev/null +++ b/tests/test_resources.py @@ -0,0 +1,88 @@ +"""Tests for data_platform.resources.""" + +from unittest.mock import MagicMock, patch + +from data_platform.resources import FundaResource, PostgresResource + +# ── FundaResource ───────────────────────────────────────────────────────────── + + +class TestFundaResource: + def test_get_client_returns_funda_instance(self): + resource = FundaResource(timeout=10) + from funda import Funda + + client = resource.get_client() + assert isinstance(client, Funda) + + def test_default_timeout(self): + resource = FundaResource() + assert resource.timeout == 30 + + def test_custom_timeout(self): + resource = FundaResource(timeout=60) + assert resource.timeout == 60 + + +# ── PostgresResource ────────────────────────────────────────────────────────── + + +class TestPostgresResource: + def _make_resource(self, **kwargs): + defaults = { + "host": "testhost", + "port": 5432, + "user": "user", + "password": "pw", + "dbname": "db", + } + return PostgresResource(**{**defaults, **kwargs}) + + def test_connection_url_format(self): + res = self._make_resource() + # Patch at the module level so the frozen instance isn't mutated + with patch("data_platform.resources.create_engine") as mock_create: + mock_create.return_value = MagicMock() + res.get_engine() + call_url = mock_create.call_args[0][0] + assert "testhost" in call_url + assert "5432" in call_url + assert "user" in call_url + assert "pw" in call_url + assert "db" in call_url + + def test_connection_url_scheme(self): + res = self._make_resource() + with patch("data_platform.resources.create_engine") as mock_create: + mock_create.return_value = MagicMock() + res.get_engine() + call_url = mock_create.call_args[0][0] + assert call_url.startswith("postgresql://") + + def test_execute_calls_engine_begin(self): + """execute() wraps its statement in engine.begin().""" + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + + # Patch create_engine at module level so that get_engine() returns our mock + with patch("data_platform.resources.create_engine", return_value=mock_engine): + res = self._make_resource() + res.execute("SELECT 1") + + mock_conn.execute.assert_called_once() + + def test_execute_many_calls_engine_begin(self): + """execute_many() wraps its statement in engine.begin().""" + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False) + + with patch("data_platform.resources.create_engine", return_value=mock_engine): + res = self._make_resource() + rows = [{"id": 1}, {"id": 2}] + res.execute_many("INSERT INTO t VALUES (:id)", rows) + + mock_conn.execute.assert_called_once()