chore(docs): clean comment format
This commit is contained in:
@@ -9,11 +9,11 @@ DBT_PROJECT_DIR = Path(__file__).parent.parent.parent / "dbt"
|
||||
|
||||
dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
|
||||
|
||||
# When running locally outside Docker, generate/refresh the manifest automatically.
|
||||
# Generate manifest locally outside Docker.
|
||||
dbt_project.prepare_if_dev()
|
||||
|
||||
|
||||
@dbt_assets(manifest=dbt_project.manifest_path)
|
||||
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
||||
"""Every dbt model/test/snapshot becomes a Dagster asset."""
|
||||
"""Expose every dbt model as a Dagster asset."""
|
||||
yield from dbt.cli(["build"], context=context).stream()
|
||||
|
||||
@@ -1,12 +1,4 @@
|
||||
"""Dagster assets for Funda real-estate data ingestion.
|
||||
|
||||
Three assets form a pipeline:
|
||||
|
||||
funda_search_results → funda_listing_details → funda_price_history
|
||||
|
||||
Each asset is configurable from the Dagster launchpad so search
|
||||
parameters (location, price range, etc.) can be tweaked per run.
|
||||
"""
|
||||
"""Funda real-estate ingestion assets."""
|
||||
|
||||
import json
|
||||
|
||||
@@ -27,13 +19,9 @@ from data_platform.helpers import (
|
||||
)
|
||||
from data_platform.resources import FundaResource, PostgresResource
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Launchpad config schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class FundaSearchConfig(Config):
|
||||
"""Launchpad parameters for the Funda search asset."""
|
||||
"""Search parameters for Funda."""
|
||||
|
||||
location: str = "woerden, utrecht, zeist, maarssen, nieuwegein, gouda"
|
||||
offering_type: str = "buy"
|
||||
@@ -43,29 +31,25 @@ class FundaSearchConfig(Config):
|
||||
area_max: int | None = None
|
||||
plot_min: int | None = None
|
||||
plot_max: int | None = None
|
||||
object_type: str | None = None # comma-separated, e.g. "house,apartment"
|
||||
energy_label: str | None = None # comma-separated, e.g. "A,A+,A++"
|
||||
object_type: str | None = None
|
||||
energy_label: str | None = None
|
||||
radius_km: int | None = None
|
||||
sort: str = "newest"
|
||||
max_pages: int = 3
|
||||
|
||||
|
||||
class FundaDetailsConfig(Config):
|
||||
"""Launchpad parameters for the listing-details asset."""
|
||||
"""Config for listing details fetch."""
|
||||
|
||||
fetch_all: bool = True # fetch details for every search result
|
||||
fetch_all: bool = True
|
||||
|
||||
|
||||
class FundaPriceHistoryConfig(Config):
|
||||
"""Launchpad parameters for the price-history asset."""
|
||||
"""Config for price history fetch."""
|
||||
|
||||
fetch_all: bool = True # fetch price history for every detailed listing
|
||||
fetch_all: bool = True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQL helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SCHEMA = "raw_funda"
|
||||
|
||||
_DDL_SEARCH = f"""
|
||||
@@ -152,8 +136,7 @@ CREATE TABLE IF NOT EXISTS {_SCHEMA}.price_history (
|
||||
);
|
||||
"""
|
||||
|
||||
# Idempotent constraint migrations for tables created before the UNIQUE clauses.
|
||||
# Deduplicates existing rows (keeps the most recent) before adding the constraint.
|
||||
# Deduplicate existing rows and add constraints for tables created before UNIQUE clauses.
|
||||
_MIGRATE_SEARCH_CONSTRAINT = f"""
|
||||
DO $$
|
||||
BEGIN
|
||||
@@ -216,11 +199,6 @@ END $$;
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Assets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@asset(
|
||||
group_name="funda",
|
||||
kinds={"python", "postgres"},
|
||||
@@ -234,7 +212,6 @@ def funda_search_results(
|
||||
) -> MaterializeResult:
|
||||
client = funda.get_client()
|
||||
|
||||
# Build search kwargs from launchpad config
|
||||
kwargs: dict = {
|
||||
"location": [loc.strip() for loc in config.location.split(",")],
|
||||
"offering_type": config.offering_type,
|
||||
@@ -259,7 +236,6 @@ def funda_search_results(
|
||||
if config.radius_km is not None:
|
||||
kwargs["radius_km"] = config.radius_km
|
||||
|
||||
# Paginate through results
|
||||
all_listings = []
|
||||
for page in range(config.max_pages):
|
||||
context.log.info(f"Fetching search page {page + 1}/{config.max_pages} …")
|
||||
@@ -275,7 +251,6 @@ def funda_search_results(
|
||||
context.log.warning("Search returned zero results.")
|
||||
return MaterializeResult(metadata={"count": 0})
|
||||
|
||||
# Write to Postgres
|
||||
engine = postgres.get_engine()
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {_SCHEMA}"))
|
||||
@@ -389,7 +364,6 @@ def funda_listing_details(
|
||||
conn.execute(text(_DDL_DETAILS))
|
||||
conn.execute(text(_MIGRATE_DETAILS_CONSTRAINT))
|
||||
|
||||
# Read listing IDs from search results
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(
|
||||
text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.search_results")
|
||||
@@ -564,7 +538,6 @@ def funda_price_history(
|
||||
conn.execute(text(_DDL_PRICE_HISTORY))
|
||||
conn.execute(text(_MIGRATE_PRICE_HISTORY_CONSTRAINT))
|
||||
|
||||
# Read listings from details table
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(
|
||||
text(f"SELECT DISTINCT global_id FROM {_SCHEMA}.listing_details")
|
||||
@@ -583,7 +556,6 @@ def funda_price_history(
|
||||
errors = 0
|
||||
for i, gid in enumerate(ids):
|
||||
try:
|
||||
# get_price_history needs a Listing object, so fetch it first
|
||||
listing = client.get_listing(int(gid))
|
||||
history = client.get_price_history(listing)
|
||||
for entry in history:
|
||||
|
||||
@@ -10,10 +10,6 @@ from data_platform.assets.funda import (
|
||||
from data_platform.resources import FundaResource, PostgresResource
|
||||
from data_platform.schedules import funda_ingestion_job, funda_ingestion_schedule
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Definitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
defs = Definitions(
|
||||
assets=[
|
||||
dbt_project_assets,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
"""Shared helper utilities for Dagster assets."""
|
||||
"""Shared helper utilities."""
|
||||
|
||||
import json
|
||||
|
||||
|
||||
def safe(val):
|
||||
"""Convert non-serialisable values (tuples, lists of dicts, etc.) for JSONB."""
|
||||
"""Convert non-serialisable values for JSONB storage."""
|
||||
if isinstance(val, list | dict | tuple):
|
||||
return json.dumps(val, default=str)
|
||||
return val
|
||||
@@ -28,15 +28,7 @@ def md_preview_table(
|
||||
columns: list[tuple[str, str]],
|
||||
formatters: dict[str, callable] | None = None,
|
||||
) -> str:
|
||||
"""Build a markdown table from a list of row dicts.
|
||||
|
||||
Args:
|
||||
rows: List of dictionaries containing row data.
|
||||
columns: List of (key, header_label) tuples defining the columns.
|
||||
formatters: Optional dict mapping column keys to formatting callables.
|
||||
Each callable receives the raw value and returns a display string.
|
||||
Columns without a formatter fall back to the raw value or "–".
|
||||
"""
|
||||
"""Build a markdown table from a list of row dicts."""
|
||||
formatters = formatters or {}
|
||||
headers = [label for _, label in columns]
|
||||
lines = [
|
||||
@@ -56,10 +48,10 @@ def md_preview_table(
|
||||
|
||||
|
||||
def format_euro(val) -> str:
|
||||
"""Format an integer as €-prefixed with thousands separators, or '–'."""
|
||||
"""Format an integer as €-prefixed, or '–'."""
|
||||
return f"€{val:,}" if val else "–"
|
||||
|
||||
|
||||
def format_area(val) -> str:
|
||||
"""Format an integer as m² area, or '–'."""
|
||||
"""Format an integer as m², or '–'."""
|
||||
return f"{val} m²" if val else "–"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Shared Dagster resources for the data platform."""
|
||||
"""Dagster resources."""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Dagster jobs and schedules for the data platform."""
|
||||
"""Dagster jobs and schedules."""
|
||||
|
||||
from dagster import (
|
||||
AssetSelection,
|
||||
@@ -13,10 +13,6 @@ from data_platform.assets.funda import (
|
||||
FundaSearchConfig,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Jobs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
funda_ingestion_job = define_asset_job(
|
||||
name="funda_ingestion",
|
||||
selection=AssetSelection.assets(
|
||||
@@ -24,17 +20,13 @@ funda_ingestion_job = define_asset_job(
|
||||
"funda_listing_details",
|
||||
"funda_price_history",
|
||||
),
|
||||
description="Run the full Funda ingestion pipeline (search → details → price history).",
|
||||
description="Full Funda ingestion pipeline.",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schedules
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
funda_ingestion_schedule = ScheduleDefinition(
|
||||
name="funda_ingestion_schedule",
|
||||
job=funda_ingestion_job,
|
||||
cron_schedule="0 */4 * * *", # every 4 hours
|
||||
cron_schedule="0 */4 * * *",
|
||||
run_config=RunConfig(
|
||||
ops={
|
||||
"funda_search_results": FundaSearchConfig(),
|
||||
|
||||
@@ -4,22 +4,15 @@ from unittest.mock import MagicMock
|
||||
|
||||
|
||||
def make_mock_engine(select_rows: list[tuple] | None = None):
|
||||
"""Return a mock SQLAlchemy engine.
|
||||
|
||||
Args:
|
||||
select_rows: Rows to return from *connect()* (SELECT queries).
|
||||
Defaults to an empty list.
|
||||
"""
|
||||
"""Return a mock SQLAlchemy engine."""
|
||||
select_rows = select_rows or []
|
||||
|
||||
engine = MagicMock()
|
||||
|
||||
# engine.begin() context manager → conn for DDL / writes
|
||||
write_conn = MagicMock()
|
||||
engine.begin.return_value.__enter__ = MagicMock(return_value=write_conn)
|
||||
engine.begin.return_value.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# engine.connect() context manager → conn for SELECTs
|
||||
read_conn = MagicMock()
|
||||
read_conn.execute.return_value = iter(select_rows)
|
||||
engine.connect.return_value.__enter__ = MagicMock(return_value=read_conn)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for Funda Dagster assets using mocked external dependencies."""
|
||||
"""Tests for Funda assets."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
@@ -12,13 +12,9 @@ from data_platform.assets.funda import (
|
||||
)
|
||||
from tests.conftest import make_mock_engine, make_mock_listing
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Duck-typed mock resources (bypass frozen-Pydantic ConfigurableResource)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class MockFundaResource:
|
||||
"""Minimal test double for FundaResource."""
|
||||
"""Test double for FundaResource."""
|
||||
|
||||
def __init__(self, client):
|
||||
self._client = client
|
||||
@@ -28,7 +24,7 @@ class MockFundaResource:
|
||||
|
||||
|
||||
class MockPostgresResource:
|
||||
"""Minimal test double for PostgresResource."""
|
||||
"""Test double for PostgresResource."""
|
||||
|
||||
def __init__(self, engine=None, inserted_rows: list | None = None):
|
||||
self._engine = engine or make_mock_engine()[0]
|
||||
@@ -44,10 +40,6 @@ class MockPostgresResource:
|
||||
self._inserted_rows.extend(rows)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared listing data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SEARCH_LISTING_DATA = {
|
||||
"global_id": "1234567",
|
||||
"title": "Teststraat 1",
|
||||
@@ -95,11 +87,6 @@ _DETAIL_LISTING_DATA = {
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# funda_search_results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFundaSearchResults:
|
||||
def _run(self, mock_client, inserted_rows=None, config=None):
|
||||
engine, _, _ = make_mock_engine()
|
||||
@@ -204,11 +191,6 @@ class TestFundaSearchResults:
|
||||
assert client.search_listing.call_args[1]["energy_label"] == ["A", "A+"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# funda_listing_details
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFundaListingDetails:
|
||||
def _run(self, mock_client, engine, inserted_rows=None):
|
||||
rows = inserted_rows if inserted_rows is not None else []
|
||||
@@ -256,11 +238,6 @@ class TestFundaListingDetails:
|
||||
assert len(inserted) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# funda_price_history
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFundaPriceHistory:
|
||||
def _run(self, mock_client, engine, inserted_rows=None):
|
||||
rows = inserted_rows if inserted_rows is not None else []
|
||||
@@ -312,11 +289,6 @@ class TestFundaPriceHistory:
|
||||
assert mat[0].metadata["count"].value == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FundaSearchConfig
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFundaSearchConfig:
|
||||
def test_defaults(self):
|
||||
cfg = FundaSearchConfig()
|
||||
@@ -337,8 +309,3 @@ class TestFundaSearchConfig:
|
||||
assert cfg.location == "rotterdam"
|
||||
assert cfg.offering_type == "rent"
|
||||
assert cfg.price_max == 2000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for pure helper functions in data_platform.assets.helpers."""
|
||||
"""Tests for helper functions."""
|
||||
|
||||
from data_platform.helpers import (
|
||||
format_area,
|
||||
@@ -8,8 +8,6 @@ from data_platform.helpers import (
|
||||
safe_int,
|
||||
)
|
||||
|
||||
# ── safe_int ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSafeInt:
|
||||
def test_none_returns_none(self):
|
||||
@@ -43,9 +41,6 @@ class TestSafeInt:
|
||||
assert safe_int([1, 2, 3]) is None
|
||||
|
||||
|
||||
# ── safe ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSafe:
|
||||
def test_dict_becomes_json_string(self):
|
||||
result = safe({"key": "val"})
|
||||
@@ -76,9 +71,6 @@ class TestSafe:
|
||||
assert json.loads(result) == data
|
||||
|
||||
|
||||
# ── format_euro ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFormatEuro:
|
||||
def test_formats_price(self):
|
||||
assert format_euro(350000) == "€350,000"
|
||||
@@ -90,9 +82,6 @@ class TestFormatEuro:
|
||||
assert format_euro(0) == "–"
|
||||
|
||||
|
||||
# ── format_area ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFormatArea:
|
||||
def test_formats_area(self):
|
||||
assert format_area(80) == "80 m²"
|
||||
@@ -104,9 +93,6 @@ class TestFormatArea:
|
||||
assert format_area(0) == "–"
|
||||
|
||||
|
||||
# ── md_preview_table ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestMdPreviewTable:
|
||||
def test_empty_rows_returns_header_only(self):
|
||||
result = md_preview_table([], columns=[("title", "Title"), ("city", "City")])
|
||||
|
||||
@@ -4,8 +4,6 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
from data_platform.resources import FundaResource, PostgresResource
|
||||
|
||||
# ── FundaResource ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFundaResource:
|
||||
def test_get_client_returns_funda_instance(self):
|
||||
@@ -24,9 +22,6 @@ class TestFundaResource:
|
||||
assert resource.timeout == 60
|
||||
|
||||
|
||||
# ── PostgresResource ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPostgresResource:
|
||||
def _make_resource(self, **kwargs):
|
||||
defaults = {
|
||||
@@ -60,7 +55,6 @@ class TestPostgresResource:
|
||||
assert call_url.startswith("postgresql://")
|
||||
|
||||
def test_execute_calls_engine_begin(self):
|
||||
"""execute() wraps its statement in engine.begin()."""
|
||||
mock_engine = MagicMock()
|
||||
mock_conn = MagicMock()
|
||||
mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn)
|
||||
@@ -74,7 +68,6 @@ class TestPostgresResource:
|
||||
mock_conn.execute.assert_called_once()
|
||||
|
||||
def test_execute_many_calls_engine_begin(self):
|
||||
"""execute_many() wraps its statement in engine.begin()."""
|
||||
mock_engine = MagicMock()
|
||||
mock_conn = MagicMock()
|
||||
mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn)
|
||||
|
||||
Reference in New Issue
Block a user