From ef31fb812f476b1942f381a30d99ce816ebb9d00 Mon Sep 17 00:00:00 2001 From: Stijnvandenbroek Date: Wed, 4 Mar 2026 14:51:27 +0000 Subject: [PATCH] chore(docs): clean comment format --- data_platform/assets/dbt.py | 4 +-- data_platform/assets/funda.py | 46 ++++++----------------------- data_platform/definitions.py | 4 --- data_platform/helpers/__init__.py | 18 ++++------- data_platform/resources/__init__.py | 2 +- data_platform/schedules.py | 14 ++------- tests/conftest.py | 9 +----- tests/test_assets_funda.py | 39 ++---------------------- tests/test_helpers.py | 16 +--------- tests/test_resources.py | 7 ----- 10 files changed, 25 insertions(+), 134 deletions(-) diff --git a/data_platform/assets/dbt.py b/data_platform/assets/dbt.py index fa5d3f9..58aeac4 100644 --- a/data_platform/assets/dbt.py +++ b/data_platform/assets/dbt.py @@ -9,11 +9,11 @@ DBT_PROJECT_DIR = Path(__file__).parent.parent.parent / "dbt" dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR)) -# When running locally outside Docker, generate/refresh the manifest automatically. +# Generate manifest locally outside Docker. dbt_project.prepare_if_dev() @dbt_assets(manifest=dbt_project.manifest_path) def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource): - """Every dbt model/test/snapshot becomes a Dagster asset.""" + """Expose every dbt model as a Dagster asset.""" yield from dbt.cli(["build"], context=context).stream() diff --git a/data_platform/assets/funda.py b/data_platform/assets/funda.py index 4ae33b2..2c5435c 100644 --- a/data_platform/assets/funda.py +++ b/data_platform/assets/funda.py @@ -1,12 +1,4 @@ -"""Dagster assets for Funda real-estate data ingestion. - -Three assets form a pipeline: - - funda_search_results → funda_listing_details → funda_price_history - -Each asset is configurable from the Dagster launchpad so search -parameters (location, price range, etc.) can be tweaked per run. -""" +"""Funda real-estate ingestion assets.""" import json @@ -27,13 +19,9 @@ from data_platform.helpers import ( ) from data_platform.resources import FundaResource, PostgresResource -# --------------------------------------------------------------------------- -# Launchpad config schemas -# --------------------------------------------------------------------------- - class FundaSearchConfig(Config): - """Launchpad parameters for the Funda search asset.""" + """Search parameters for Funda.""" location: str = "woerden, utrecht, zeist, maarssen, nieuwegein, gouda" offering_type: str = "buy" @@ -43,29 +31,25 @@ class FundaSearchConfig(Config): area_max: int | None = None plot_min: int | None = None plot_max: int | None = None - object_type: str | None = None # comma-separated, e.g. "house,apartment" - energy_label: str | None = None # comma-separated, e.g. "A,A+,A++" + object_type: str | None = None + energy_label: str | None = None radius_km: int | None = None sort: str = "newest" max_pages: int = 3 class FundaDetailsConfig(Config): - """Launchpad parameters for the listing-details asset.""" + """Config for listing details fetch.""" - fetch_all: bool = True # fetch details for every search result + fetch_all: bool = True class FundaPriceHistoryConfig(Config): - """Launchpad parameters for the price-history asset.""" + """Config for price history fetch.""" - fetch_all: bool = True # fetch price history for every detailed listing + fetch_all: bool = True -# --------------------------------------------------------------------------- -# SQL helpers -# --------------------------------------------------------------------------- - _SCHEMA = "raw_funda" _DDL_SEARCH = f""" @@ -152,8 +136,7 @@ CREATE TABLE IF NOT EXISTS {_SCHEMA}.price_history ( ); """ -# Idempotent constraint migrations for tables created before the UNIQUE clauses. -# Deduplicates existing rows (keeps the most recent) before adding the constraint. +# Deduplicate existing rows and add constraints for tables created before UNIQUE clauses. _MIGRATE_SEARCH_CONSTRAINT = f""" DO $$ BEGIN @@ -216,11 +199,6 @@ END $$; """ -# --------------------------------------------------------------------------- -# Assets -# --------------------------------------------------------------------------- - - @asset( group_name="funda", kinds={"python", "postgres"}, @@ -234,7 +212,6 @@ def funda_search_results( ) -> MaterializeResult: client = funda.get_client() - # Build search kwargs from launchpad config kwargs: dict = { "location": [loc.strip() for loc in config.location.split(",")], "offering_type": config.offering_type, @@ -259,7 +236,6 @@ def funda_search_results( if config.radius_km is not None: kwargs["radius_km"] = config.radius_km - # Paginate through results all_listings = [] for page in range(config.max_pages): context.log.info(f"Fetching search page {page + 1}/{config.max_pages} …") @@ -275,7 +251,6 @@ def funda_search_results( context.log.warning("Search returned zero results.") return MaterializeResult(metadata={"count": 0}) - # Write to Postgres engine = postgres.get_engine() with engine.begin() as conn: conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}")) @@ -389,7 +364,6 @@ def funda_listing_details( conn.execute(text(_DDL_DETAILS)) conn.execute(text(_MIGRATE_DETAILS_CONSTRAINT)) - # Read listing IDs from search results with engine.connect() as conn: result = conn.execute( text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results") @@ -564,7 +538,6 @@ def funda_price_history( conn.execute(text(_DDL_PRICE_HISTORY)) conn.execute(text(_MIGRATE_PRICE_HISTORY_CONSTRAINT)) - # Read listings from details table with engine.connect() as conn: result = conn.execute( text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details") @@ -583,7 +556,6 @@ def funda_price_history( errors = 0 for i, gid in enumerate(ids): try: - # get_price_history needs a Listing object, so fetch it first listing = client.get_listing(int(gid)) history = client.get_price_history(listing) for entry in history: diff --git a/data_platform/definitions.py b/data_platform/definitions.py index 3d7c539..cf3a474 100644 --- a/data_platform/definitions.py +++ b/data_platform/definitions.py @@ -10,10 +10,6 @@ from data_platform.assets.funda import ( from data_platform.resources import FundaResource, PostgresResource from data_platform.schedules import funda_ingestion_job, funda_ingestion_schedule -# --------------------------------------------------------------------------- -# Definitions -# --------------------------------------------------------------------------- - defs = Definitions( assets=[ dbt_project_assets, diff --git a/data_platform/helpers/__init__.py b/data_platform/helpers/__init__.py index 36f1a5a..31978cc 100644 --- a/data_platform/helpers/__init__.py +++ b/data_platform/helpers/__init__.py @@ -1,10 +1,10 @@ -"""Shared helper utilities for Dagster assets.""" +"""Shared helper utilities.""" import json def safe(val): - """Convert non-serialisable values (tuples, lists of dicts, etc.) for JSONB.""" + """Convert non-serialisable values for JSONB storage.""" if isinstance(val, list | dict | tuple): return json.dumps(val, default=str) return val @@ -28,15 +28,7 @@ def md_preview_table( columns: list[tuple[str, str]], formatters: dict[str, callable] | None = None, ) -> str: - """Build a markdown table from a list of row dicts. - - Args: - rows: List of dictionaries containing row data. - columns: List of (key, header_label) tuples defining the columns. - formatters: Optional dict mapping column keys to formatting callables. - Each callable receives the raw value and returns a display string. - Columns without a formatter fall back to the raw value or "–". - """ + """Build a markdown table from a list of row dicts.""" formatters = formatters or {} headers = [label for _, label in columns] lines = [ @@ -56,10 +48,10 @@ def md_preview_table( def format_euro(val) -> str: - """Format an integer as €-prefixed with thousands separators, or '–'.""" + """Format an integer as €-prefixed, or '–'.""" return f"€{val:,}" if val else "–" def format_area(val) -> str: - """Format an integer as m² area, or '–'.""" + """Format an integer as m², or '–'.""" return f"{val} m²" if val else "–" diff --git a/data_platform/resources/__init__.py b/data_platform/resources/__init__.py index 11d064f..915a0d4 100644 --- a/data_platform/resources/__init__.py +++ b/data_platform/resources/__init__.py @@ -1,4 +1,4 @@ -"""Shared Dagster resources for the data platform.""" +"""Dagster resources.""" import os diff --git a/data_platform/schedules.py b/data_platform/schedules.py index 400113a..822d702 100644 --- a/data_platform/schedules.py +++ b/data_platform/schedules.py @@ -1,4 +1,4 @@ -"""Dagster jobs and schedules for the data platform.""" +"""Dagster jobs and schedules.""" from dagster import ( AssetSelection, @@ -13,10 +13,6 @@ from data_platform.assets.funda import ( FundaSearchConfig, ) -# --------------------------------------------------------------------------- -# Jobs -# --------------------------------------------------------------------------- - funda_ingestion_job = define_asset_job( name="funda_ingestion", selection=AssetSelection.assets( @@ -24,17 +20,13 @@ funda_ingestion_job = define_asset_job( "funda_listing_details", "funda_price_history", ), - description="Run the full Funda ingestion pipeline (search → details → price history).", + description="Full Funda ingestion pipeline.", ) -# --------------------------------------------------------------------------- -# Schedules -# --------------------------------------------------------------------------- - funda_ingestion_schedule = ScheduleDefinition( name="funda_ingestion_schedule", job=funda_ingestion_job, - cron_schedule="0 */4 * * *", # every 4 hours + cron_schedule="0 */4 * * *", run_config=RunConfig( ops={ "funda_search_results": FundaSearchConfig(), diff --git a/tests/conftest.py b/tests/conftest.py index 60ff38d..4c924fa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,22 +4,15 @@ from unittest.mock import MagicMock def make_mock_engine(select_rows: list[tuple] | None = None): - """Return a mock SQLAlchemy engine. - - Args: - select_rows: Rows to return from *connect()* (SELECT queries). - Defaults to an empty list. - """ + """Return a mock SQLAlchemy engine.""" select_rows = select_rows or [] engine = MagicMock() - # engine.begin() context manager → conn for DDL / writes write_conn = MagicMock() engine.begin.return_value.__enter__ = MagicMock(return_value=write_conn) engine.begin.return_value.__exit__ = MagicMock(return_value=False) - # engine.connect() context manager → conn for SELECTs read_conn = MagicMock() read_conn.execute.return_value = iter(select_rows) engine.connect.return_value.__enter__ = MagicMock(return_value=read_conn) diff --git a/tests/test_assets_funda.py b/tests/test_assets_funda.py index df79f9e..f063cf3 100644 --- a/tests/test_assets_funda.py +++ b/tests/test_assets_funda.py @@ -1,4 +1,4 @@ -"""Tests for Funda Dagster assets using mocked external dependencies.""" +"""Tests for Funda assets.""" from unittest.mock import MagicMock @@ -12,13 +12,9 @@ from data_platform.assets.funda import ( ) from tests.conftest import make_mock_engine, make_mock_listing -# --------------------------------------------------------------------------- -# Duck-typed mock resources (bypass frozen-Pydantic ConfigurableResource) -# --------------------------------------------------------------------------- - class MockFundaResource: - """Minimal test double for FundaResource.""" + """Test double for FundaResource.""" def __init__(self, client): self._client = client @@ -28,7 +24,7 @@ class MockFundaResource: class MockPostgresResource: - """Minimal test double for PostgresResource.""" + """Test double for PostgresResource.""" def __init__(self, engine=None, inserted_rows: list | None = None): self._engine = engine or make_mock_engine()[0] @@ -44,10 +40,6 @@ class MockPostgresResource: self._inserted_rows.extend(rows) -# --------------------------------------------------------------------------- -# Shared listing data -# --------------------------------------------------------------------------- - _SEARCH_LISTING_DATA = { "global_id": "1234567", "title": "Teststraat 1", @@ -95,11 +87,6 @@ _DETAIL_LISTING_DATA = { } -# --------------------------------------------------------------------------- -# funda_search_results -# --------------------------------------------------------------------------- - - class TestFundaSearchResults: def _run(self, mock_client, inserted_rows=None, config=None): engine, _, _ = make_mock_engine() @@ -204,11 +191,6 @@ class TestFundaSearchResults: assert client.search_listing.call_args[1]["energy_label"] == ["A", "A+"] -# --------------------------------------------------------------------------- -# funda_listing_details -# --------------------------------------------------------------------------- - - class TestFundaListingDetails: def _run(self, mock_client, engine, inserted_rows=None): rows = inserted_rows if inserted_rows is not None else [] @@ -256,11 +238,6 @@ class TestFundaListingDetails: assert len(inserted) == 1 -# --------------------------------------------------------------------------- -# funda_price_history -# --------------------------------------------------------------------------- - - class TestFundaPriceHistory: def _run(self, mock_client, engine, inserted_rows=None): rows = inserted_rows if inserted_rows is not None else [] @@ -312,11 +289,6 @@ class TestFundaPriceHistory: assert mat[0].metadata["count"].value == 2 -# --------------------------------------------------------------------------- -# FundaSearchConfig -# --------------------------------------------------------------------------- - - class TestFundaSearchConfig: def test_defaults(self): cfg = FundaSearchConfig() @@ -337,8 +309,3 @@ class TestFundaSearchConfig: assert cfg.location == "rotterdam" assert cfg.offering_type == "rent" assert cfg.price_max == 2000 - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- diff --git a/tests/test_helpers.py b/tests/test_helpers.py index d71a4d1..a05ac24 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -1,4 +1,4 @@ -"""Tests for pure helper functions in data_platform.assets.helpers.""" +"""Tests for helper functions.""" from data_platform.helpers import ( format_area, @@ -8,8 +8,6 @@ from data_platform.helpers import ( safe_int, ) -# ── safe_int ──────────────────────────────────────────────────────────────── - class TestSafeInt: def test_none_returns_none(self): @@ -43,9 +41,6 @@ class TestSafeInt: assert safe_int([1, 2, 3]) is None -# ── safe ───────────────────────────────────────────────────────────────────── - - class TestSafe: def test_dict_becomes_json_string(self): result = safe({"key": "val"}) @@ -76,9 +71,6 @@ class TestSafe: assert json.loads(result) == data -# ── format_euro ────────────────────────────────────────────────────────────── - - class TestFormatEuro: def test_formats_price(self): assert format_euro(350000) == "€350,000" @@ -90,9 +82,6 @@ class TestFormatEuro: assert format_euro(0) == "–" -# ── format_area ────────────────────────────────────────────────────────────── - - class TestFormatArea: def test_formats_area(self): assert format_area(80) == "80 m²" @@ -104,9 +93,6 @@ class TestFormatArea: assert format_area(0) == "–" -# ── md_preview_table ───────────────────────────────────────────────────────── - - class TestMdPreviewTable: def test_empty_rows_returns_header_only(self): result = md_preview_table([], columns=[("title", "Title"), ("city", "City")]) diff --git a/tests/test_resources.py b/tests/test_resources.py index 8de95e7..f3c6f59 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -4,8 +4,6 @@ from unittest.mock import MagicMock, patch from data_platform.resources import FundaResource, PostgresResource -# ── FundaResource ───────────────────────────────────────────────────────────── - class TestFundaResource: def test_get_client_returns_funda_instance(self): @@ -24,9 +22,6 @@ class TestFundaResource: assert resource.timeout == 60 -# ── PostgresResource ────────────────────────────────────────────────────────── - - class TestPostgresResource: def _make_resource(self, **kwargs): defaults = { @@ -60,7 +55,6 @@ class TestPostgresResource: assert call_url.startswith("postgresql://") def test_execute_calls_engine_begin(self): - """execute() wraps its statement in engine.begin().""" mock_engine = MagicMock() mock_conn = MagicMock() mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn) @@ -74,7 +68,6 @@ class TestPostgresResource: mock_conn.execute.assert_called_once() def test_execute_many_calls_engine_begin(self): - """execute_many() wraps its statement in engine.begin().""" mock_engine = MagicMock() mock_conn = MagicMock() mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn)