chore(docs): clean comment format

This commit is contained in:
Stijnvandenbroek
2026-03-04 14:51:27 +00:00
parent 11faf2beab
commit ef31fb812f
10 changed files with 25 additions and 134 deletions

View File

@@ -9,11 +9,11 @@ DBT_PROJECT_DIR = Path(__file__).parent.parent.parent / "dbt"
dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
# When running locally outside Docker, generate/refresh the manifest automatically.
# Generate manifest locally outside Docker.
dbt_project.prepare_if_dev()
@dbt_assets(manifest=dbt_project.manifest_path)
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
"""Every dbt model/test/snapshot becomes a Dagster asset."""
"""Expose every dbt model as a Dagster asset."""
yield from dbt.cli(["build"], context=context).stream()

View File

@@ -1,12 +1,4 @@
"""Dagster assets for Funda real-estate data ingestion.
Three assets form a pipeline:
funda_search_results → funda_listing_details → funda_price_history
Each asset is configurable from the Dagster launchpad so search
parameters (location, price range, etc.) can be tweaked per run.
"""
"""Funda real-estate ingestion assets."""
import json
@@ -27,13 +19,9 @@ from data_platform.helpers import (
)
from data_platform.resources import FundaResource, PostgresResource
# ---------------------------------------------------------------------------
# Launchpad config schemas
# ---------------------------------------------------------------------------
class FundaSearchConfig(Config):
"""Launchpad parameters for the Funda search asset."""
"""Search parameters for Funda."""
location: str = "woerden, utrecht, zeist, maarssen, nieuwegein, gouda"
offering_type: str = "buy"
@@ -43,29 +31,25 @@ class FundaSearchConfig(Config):
area_max: int | None = None
plot_min: int | None = None
plot_max: int | None = None
object_type: str | None = None # comma-separated, e.g. "house,apartment"
energy_label: str | None = None # comma-separated, e.g. "A,A+,A++"
object_type: str | None = None
energy_label: str | None = None
radius_km: int | None = None
sort: str = "newest"
max_pages: int = 3
class FundaDetailsConfig(Config):
"""Launchpad parameters for the listing-details asset."""
"""Config for listing details fetch."""
fetch_all: bool = True # fetch details for every search result
fetch_all: bool = True
class FundaPriceHistoryConfig(Config):
"""Launchpad parameters for the price-history asset."""
"""Config for price history fetch."""
fetch_all: bool = True # fetch price history for every detailed listing
fetch_all: bool = True
# ---------------------------------------------------------------------------
# SQL helpers
# ---------------------------------------------------------------------------
_SCHEMA = "raw_funda"
_DDL_SEARCH = f"""
@@ -152,8 +136,7 @@ CREATE TABLE IF NOT EXISTS {_SCHEMA}.price_history (
);
"""
# Idempotent constraint migrations for tables created before the UNIQUE clauses.
# Deduplicates existing rows (keeps the most recent) before adding the constraint.
# Deduplicate existing rows and add constraints for tables created before UNIQUE clauses.
_MIGRATE_SEARCH_CONSTRAINT = f"""
DO $$
BEGIN
@@ -216,11 +199,6 @@ END $$;
"""
# ---------------------------------------------------------------------------
# Assets
# ---------------------------------------------------------------------------
@asset(
group_name="funda",
kinds={"python", "postgres"},
@@ -234,7 +212,6 @@ def funda_search_results(
) -> MaterializeResult:
client = funda.get_client()
# Build search kwargs from launchpad config
kwargs: dict = {
"location": [loc.strip() for loc in config.location.split(",")],
"offering_type": config.offering_type,
@@ -259,7 +236,6 @@ def funda_search_results(
if config.radius_km is not None:
kwargs["radius_km"] = config.radius_km
# Paginate through results
all_listings = []
for page in range(config.max_pages):
context.log.info(f"Fetching search page {page + 1}/{config.max_pages}")
@@ -275,7 +251,6 @@ def funda_search_results(
context.log.warning("Search returned zero results.")
return MaterializeResult(metadata={"count": 0})
# Write to Postgres
engine = postgres.get_engine()
with engine.begin() as conn:
conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}"))
@@ -389,7 +364,6 @@ def funda_listing_details(
conn.execute(text(_DDL_DETAILS))
conn.execute(text(_MIGRATE_DETAILS_CONSTRAINT))
# Read listing IDs from search results
with engine.connect() as conn:
result = conn.execute(
text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results")
@@ -564,7 +538,6 @@ def funda_price_history(
conn.execute(text(_DDL_PRICE_HISTORY))
conn.execute(text(_MIGRATE_PRICE_HISTORY_CONSTRAINT))
# Read listings from details table
with engine.connect() as conn:
result = conn.execute(
text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details")
@@ -583,7 +556,6 @@ def funda_price_history(
errors = 0
for i, gid in enumerate(ids):
try:
# get_price_history needs a Listing object, so fetch it first
listing = client.get_listing(int(gid))
history = client.get_price_history(listing)
for entry in history:

View File

@@ -10,10 +10,6 @@ from data_platform.assets.funda import (
from data_platform.resources import FundaResource, PostgresResource
from data_platform.schedules import funda_ingestion_job, funda_ingestion_schedule
# ---------------------------------------------------------------------------
# Definitions
# ---------------------------------------------------------------------------
defs = Definitions(
assets=[
dbt_project_assets,

View File

@@ -1,10 +1,10 @@
"""Shared helper utilities for Dagster assets."""
"""Shared helper utilities."""
import json
def safe(val):
"""Convert non-serialisable values (tuples, lists of dicts, etc.) for JSONB."""
"""Convert non-serialisable values for JSONB storage."""
if isinstance(val, list | dict | tuple):
return json.dumps(val, default=str)
return val
@@ -28,15 +28,7 @@ def md_preview_table(
columns: list[tuple[str, str]],
formatters: dict[str, callable] | None = None,
) -> str:
"""Build a markdown table from a list of row dicts.
Args:
rows: List of dictionaries containing row data.
columns: List of (key, header_label) tuples defining the columns.
formatters: Optional dict mapping column keys to formatting callables.
Each callable receives the raw value and returns a display string.
Columns without a formatter fall back to the raw value or "".
"""
"""Build a markdown table from a list of row dicts."""
formatters = formatters or {}
headers = [label for _, label in columns]
lines = [
@@ -56,10 +48,10 @@ def md_preview_table(
def format_euro(val) -> str:
"""Format an integer as €-prefixed with thousands separators, or ''."""
"""Format an integer as €-prefixed, or ''."""
return f"{val:,}" if val else ""
def format_area(val) -> str:
"""Format an integer as m² area, or ''."""
"""Format an integer as m², or ''."""
return f"{val}" if val else ""

View File

@@ -1,4 +1,4 @@
"""Shared Dagster resources for the data platform."""
"""Dagster resources."""
import os

View File

@@ -1,4 +1,4 @@
"""Dagster jobs and schedules for the data platform."""
"""Dagster jobs and schedules."""
from dagster import (
AssetSelection,
@@ -13,10 +13,6 @@ from data_platform.assets.funda import (
FundaSearchConfig,
)
# ---------------------------------------------------------------------------
# Jobs
# ---------------------------------------------------------------------------
funda_ingestion_job = define_asset_job(
name="funda_ingestion",
selection=AssetSelection.assets(
@@ -24,17 +20,13 @@ funda_ingestion_job = define_asset_job(
"funda_listing_details",
"funda_price_history",
),
description="Run the full Funda ingestion pipeline (search → details → price history).",
description="Full Funda ingestion pipeline.",
)
# ---------------------------------------------------------------------------
# Schedules
# ---------------------------------------------------------------------------
funda_ingestion_schedule = ScheduleDefinition(
name="funda_ingestion_schedule",
job=funda_ingestion_job,
cron_schedule="0 */4 * * *", # every 4 hours
cron_schedule="0 */4 * * *",
run_config=RunConfig(
ops={
"funda_search_results": FundaSearchConfig(),