chore(docs): clean comment format

2026-03-04 14:51:27 +00:00
parent 11faf2beab
commit ef31fb812f
10 changed files with 25 additions and 134 deletions
--- a/data_platform/assets/dbt.py
+++ b/data_platform/assets/dbt.py
@@ -9,11 +9,11 @@ DBT_PROJECT_DIR = Path(__file__).parent.parent.parent / "dbt"

 dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))

-# When running locally outside Docker, generate/refresh the manifest automatically.
+# Generate manifest locally outside Docker.
 dbt_project.prepare_if_dev()


@dbt_assets(manifest=dbt_project.manifest_path)
 def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
-    """Every dbt model/test/snapshot becomes a Dagster asset."""
+    """Expose every dbt model as a Dagster asset."""
    yield from dbt.cli(["build"], context=context).stream()
--- a/data_platform/assets/funda.py
+++ b/data_platform/assets/funda.py
@@ -1,12 +1,4 @@
-"""Dagster assets for Funda real-estate data ingestion.
-
-Three assets form a pipeline:
-
-    funda_search_results  →  funda_listing_details  →  funda_price_history
-
-Each asset is configurable from the Dagster launchpad so search
-parameters (location, price range, etc.) can be tweaked per run.
-"""
+"""Funda real-estate ingestion assets."""

 import json

@@ -27,13 +19,9 @@ from data_platform.helpers import (
 )
 from data_platform.resources import FundaResource, PostgresResource

-# ---------------------------------------------------------------------------
-# Launchpad config schemas
-# ---------------------------------------------------------------------------
-

 class FundaSearchConfig(Config):
-    """Launchpad parameters for the Funda search asset."""
+    """Search parameters for Funda."""

    location: str = "woerden, utrecht, zeist, maarssen, nieuwegein, gouda"
    offering_type: str = "buy"
@@ -43,29 +31,25 @@ class FundaSearchConfig(Config):
    area_max: int | None = None
    plot_min: int | None = None
    plot_max: int | None = None
-    object_type: str | None = None  # comma-separated, e.g. "house,apartment"
-    energy_label: str | None = None  # comma-separated, e.g. "A,A+,A++"
+    object_type: str | None = None
+    energy_label: str | None = None
    radius_km: int | None = None
    sort: str = "newest"
    max_pages: int = 3


 class FundaDetailsConfig(Config):
-    """Launchpad parameters for the listing-details asset."""
+    """Config for listing details fetch."""

-    fetch_all: bool = True  # fetch details for every search result
+    fetch_all: bool = True


 class FundaPriceHistoryConfig(Config):
-    """Launchpad parameters for the price-history asset."""
+    """Config for price history fetch."""

-    fetch_all: bool = True  # fetch price history for every detailed listing
+    fetch_all: bool = True


-# ---------------------------------------------------------------------------
-# SQL helpers
-# ---------------------------------------------------------------------------
-
 _SCHEMA = "raw_funda"

 _DDL_SEARCH = f"""
@@ -152,8 +136,7 @@ CREATE TABLE IF NOT EXISTS {_SCHEMA}.price_history (
 );
 """

-# Idempotent constraint migrations for tables created before the UNIQUE clauses.
-# Deduplicates existing rows (keeps the most recent) before adding the constraint.
+# Deduplicate existing rows and add constraints for tables created before UNIQUE clauses.
 _MIGRATE_SEARCH_CONSTRAINT = f"""
 DO $$
 BEGIN
@@ -216,11 +199,6 @@ END $$;
 """


-# ---------------------------------------------------------------------------
-# Assets
-# ---------------------------------------------------------------------------
-
-
@asset(
    group_name="funda",
    kinds={"python", "postgres"},
@@ -234,7 +212,6 @@ def funda_search_results(
 ) -> MaterializeResult:
    client = funda.get_client()

-    # Build search kwargs from launchpad config
    kwargs: dict = {
        "location": [loc.strip() for loc in config.location.split(",")],
        "offering_type": config.offering_type,
@@ -259,7 +236,6 @@ def funda_search_results(
    if config.radius_km is not None:
        kwargs["radius_km"] = config.radius_km

-    # Paginate through results
    all_listings = []
    for page in range(config.max_pages):
        context.log.info(f"Fetching search page {page + 1}/{config.max_pages} …")
@@ -275,7 +251,6 @@ def funda_search_results(
        context.log.warning("Search returned zero results.")
        return MaterializeResult(metadata={"count": 0})

-    # Write to Postgres
    engine = postgres.get_engine()
    with engine.begin() as conn:
        conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}"))
@@ -389,7 +364,6 @@ def funda_listing_details(
        conn.execute(text(_DDL_DETAILS))
        conn.execute(text(_MIGRATE_DETAILS_CONSTRAINT))

-    # Read listing IDs from search results
    with engine.connect() as conn:
        result = conn.execute(
            text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results")
@@ -564,7 +538,6 @@ def funda_price_history(
        conn.execute(text(_DDL_PRICE_HISTORY))
        conn.execute(text(_MIGRATE_PRICE_HISTORY_CONSTRAINT))

-    # Read listings from details table
    with engine.connect() as conn:
        result = conn.execute(
            text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details")
@@ -583,7 +556,6 @@ def funda_price_history(
    errors = 0
    for i, gid in enumerate(ids):
        try:
-            # get_price_history needs a Listing object, so fetch it first
            listing = client.get_listing(int(gid))
            history = client.get_price_history(listing)
            for entry in history:
--- a/data_platform/definitions.py
+++ b/data_platform/definitions.py
@@ -10,10 +10,6 @@ from data_platform.assets.funda import (
 from data_platform.resources import FundaResource, PostgresResource
 from data_platform.schedules import funda_ingestion_job, funda_ingestion_schedule

-# ---------------------------------------------------------------------------
-# Definitions
-# ---------------------------------------------------------------------------
-
 defs = Definitions(
    assets=[
        dbt_project_assets,
--- a/data_platform/helpers/init.py
+++ b/data_platform/helpers/init.py
@@ -1,10 +1,10 @@
-"""Shared helper utilities for Dagster assets."""
+"""Shared helper utilities."""

 import json


 def safe(val):
-    """Convert non-serialisable values (tuples, lists of dicts, etc.) for JSONB."""
+    """Convert non-serialisable values for JSONB storage."""
    if isinstance(val, list | dict | tuple):
        return json.dumps(val, default=str)
    return val
@@ -28,15 +28,7 @@ def md_preview_table(
    columns: list[tuple[str, str]],
    formatters: dict[str, callable] | None = None,
 ) -> str:
-    """Build a markdown table from a list of row dicts.
-
-    Args:
-        rows: List of dictionaries containing row data.
-        columns: List of (key, header_label) tuples defining the columns.
-        formatters: Optional dict mapping column keys to formatting callables.
-            Each callable receives the raw value and returns a display string.
-            Columns without a formatter fall back to the raw value or "–".
-    """
+    """Build a markdown table from a list of row dicts."""
    formatters = formatters or {}
    headers = [label for _, label in columns]
    lines = [
@@ -56,10 +48,10 @@ def md_preview_table(


 def format_euro(val) -> str:
-    """Format an integer as €-prefixed with thousands separators, or '–'."""
+    """Format an integer as €-prefixed, or '–'."""
    return f"€{val:,}" if val else "–"


 def format_area(val) -> str:
-    """Format an integer as m² area, or '–'."""
+    """Format an integer as m², or '–'."""
    return f"{val} m²" if val else "–"
--- a/data_platform/resources/init.py
+++ b/data_platform/resources/init.py
@@ -1,4 +1,4 @@
-"""Shared Dagster resources for the data platform."""
+"""Dagster resources."""

 import os

--- a/data_platform/schedules.py
+++ b/data_platform/schedules.py
@@ -1,4 +1,4 @@
-"""Dagster jobs and schedules for the data platform."""
+"""Dagster jobs and schedules."""

 from dagster import (
    AssetSelection,
@@ -13,10 +13,6 @@ from data_platform.assets.funda import (
    FundaSearchConfig,
 )

-# ---------------------------------------------------------------------------
-# Jobs
-# ---------------------------------------------------------------------------
-
 funda_ingestion_job = define_asset_job(
    name="funda_ingestion",
    selection=AssetSelection.assets(
@@ -24,17 +20,13 @@ funda_ingestion_job = define_asset_job(
        "funda_listing_details",
        "funda_price_history",
    ),
-    description="Run the full Funda ingestion pipeline (search → details → price history).",
+    description="Full Funda ingestion pipeline.",
 )

-# ---------------------------------------------------------------------------
-# Schedules
-# ---------------------------------------------------------------------------
-
 funda_ingestion_schedule = ScheduleDefinition(
    name="funda_ingestion_schedule",
    job=funda_ingestion_job,
-    cron_schedule="0 */4 * * *",  # every 4 hours
+    cron_schedule="0 */4 * * *",
    run_config=RunConfig(
        ops={
            "funda_search_results": FundaSearchConfig(),