feat: implement linting and testing

This commit is contained in:
Stijnvandenbroek
2026-03-03 22:02:25 +00:00
parent 8dd6a7b890
commit fc43570506
16 changed files with 884 additions and 56 deletions

0
tests/__init__.py Normal file
View File

35
tests/conftest.py Normal file
View File

@@ -0,0 +1,35 @@
"""Shared test fixtures."""
from unittest.mock import MagicMock
def make_mock_engine(select_rows: list[tuple] | None = None):
"""Return a mock SQLAlchemy engine.
Args:
select_rows: Rows to return from *connect()* (SELECT queries).
Defaults to an empty list.
"""
select_rows = select_rows or []
engine = MagicMock()
# engine.begin() context manager → conn for DDL / writes
write_conn = MagicMock()
engine.begin.return_value.__enter__ = MagicMock(return_value=write_conn)
engine.begin.return_value.__exit__ = MagicMock(return_value=False)
# engine.connect() context manager → conn for SELECTs
read_conn = MagicMock()
read_conn.execute.return_value = iter(select_rows)
engine.connect.return_value.__enter__ = MagicMock(return_value=read_conn)
engine.connect.return_value.__exit__ = MagicMock(return_value=False)
return engine, write_conn, read_conn
def make_mock_listing(data: dict):
"""Return a mock pyfunda Listing-like object."""
listing = MagicMock()
listing.to_dict.return_value = data
return listing

336
tests/test_assets_funda.py Normal file
View File

@@ -0,0 +1,336 @@
"""Tests for Funda Dagster assets using mocked external dependencies."""
from unittest.mock import MagicMock
from dagster import materialize
from data_platform.assets.funda import (
FundaSearchConfig,
funda_listing_details,
funda_price_history,
funda_search_results,
)
from tests.conftest import make_mock_engine, make_mock_listing
# ---------------------------------------------------------------------------
# Duck-typed mock resources (bypass frozen-Pydantic ConfigurableResource)
# ---------------------------------------------------------------------------
class MockFundaResource:
"""Minimal test double for FundaResource."""
def __init__(self, client):
self._client = client
def get_client(self):
return self._client
class MockPostgresResource:
"""Minimal test double for PostgresResource."""
def __init__(self, engine=None, inserted_rows: list | None = None):
self._engine = engine or make_mock_engine()[0]
self._inserted_rows = inserted_rows if inserted_rows is not None else []
def get_engine(self):
return self._engine
def execute(self, statement, params=None):
pass
def execute_many(self, statement, rows):
self._inserted_rows.extend(rows)
# ---------------------------------------------------------------------------
# Shared listing data
# ---------------------------------------------------------------------------
_SEARCH_LISTING_DATA = {
"global_id": "1234567",
"title": "Teststraat 1",
"city": "Amsterdam",
"postcode": "1234AB",
"province": "Noord-Holland",
"neighbourhood": "Centrum",
"price": 350000,
"living_area": 80,
"plot_area": None,
"bedrooms": 3,
"rooms": 5,
"energy_label": "A",
"object_type": "apartment",
"offering_type": "buy",
"construction_type": "existing",
"publish_date": "2026-01-15",
"broker_id": "999",
"broker_name": "Test Makelaars",
}
_DETAIL_LISTING_DATA = {
**_SEARCH_LISTING_DATA,
"tiny_id": "87654321",
"municipality": "Amsterdam",
"price_formatted": "\u20ac 350.000 k.k.",
"status": "available",
"house_type": "Appartement",
"construction_year": "1985",
"description": "A lovely apartment.",
"publication_date": "2026-01-15",
"latitude": 52.37,
"longitude": 4.89,
"has_garden": False,
"has_balcony": True,
"has_solar_panels": False,
"has_heat_pump": False,
"has_roof_terrace": False,
"is_energy_efficient": True,
"is_monument": False,
"url": "https://www.funda.nl/detail/koop/amsterdam/app/87654321/",
"photo_count": 12,
"views": 150,
"saves": 30,
}
# ---------------------------------------------------------------------------
# funda_search_results
# ---------------------------------------------------------------------------
class TestFundaSearchResults:
def _run(self, mock_client, inserted_rows=None, config=None):
engine, _, _ = make_mock_engine()
rows = inserted_rows if inserted_rows is not None else []
result = materialize(
[funda_search_results],
resources={
"funda": MockFundaResource(mock_client),
"postgres": MockPostgresResource(engine, rows),
},
run_config={
"ops": {"funda_search_results": {"config": {"max_pages": 1, **(config or {})}}}
},
)
return result
def test_no_results_returns_count_zero(self):
client = MagicMock()
client.search_listing.return_value = []
result = self._run(client)
assert result.success
mat = result.asset_materializations_for_node("funda_search_results")
assert mat[0].metadata["count"].value == 0
def test_results_are_inserted(self):
client = MagicMock()
client.search_listing.return_value = [make_mock_listing(_SEARCH_LISTING_DATA)]
rows = []
result = self._run(client, inserted_rows=rows)
assert result.success
assert len(rows) == 1
assert rows[0]["city"] == "Amsterdam"
assert rows[0]["price"] == 350000
def test_pagination_stops_on_empty_page(self):
client = MagicMock()
client.search_listing.side_effect = [
[make_mock_listing(_SEARCH_LISTING_DATA)],
[],
]
inserted = []
result = materialize(
[funda_search_results],
resources={
"funda": MockFundaResource(client),
"postgres": MockPostgresResource(make_mock_engine()[0], inserted),
},
run_config={"ops": {"funda_search_results": {"config": {"max_pages": 3}}}},
)
assert result.success
assert client.search_listing.call_count == 2
assert len(inserted) == 1
def test_location_split_by_comma(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"location": "amsterdam, rotterdam"})
call_kwargs = client.search_listing.call_args[1]
assert call_kwargs["location"] == ["amsterdam", "rotterdam"]
def test_price_max_forwarded(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"price_max": 500000})
assert client.search_listing.call_args[1]["price_max"] == 500000
def test_price_min_forwarded(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"price_min": 200000})
assert client.search_listing.call_args[1]["price_min"] == 200000
def test_area_min_forwarded(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"area_min": 50})
assert client.search_listing.call_args[1]["area_min"] == 50
def test_radius_km_forwarded(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"location": "1012AB", "radius_km": 10})
assert client.search_listing.call_args[1]["radius_km"] == 10
def test_object_type_split_by_comma(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"object_type": "house, apartment"})
assert client.search_listing.call_args[1]["object_type"] == ["house", "apartment"]
def test_energy_label_split_by_comma(self):
client = MagicMock()
client.search_listing.return_value = []
self._run(client, config={"energy_label": "A, A+"})
assert client.search_listing.call_args[1]["energy_label"] == ["A", "A+"]
# ---------------------------------------------------------------------------
# funda_listing_details
# ---------------------------------------------------------------------------
class TestFundaListingDetails:
def _run(self, mock_client, engine, inserted_rows=None):
rows = inserted_rows if inserted_rows is not None else []
return materialize(
[funda_listing_details],
resources={
"funda": MockFundaResource(mock_client),
"postgres": MockPostgresResource(engine, rows),
},
)
def test_no_search_results_returns_count_zero(self):
engine, _, _ = make_mock_engine(select_rows=[])
client = MagicMock()
result = self._run(client, engine)
assert result.success
mat = result.asset_materializations_for_node("funda_listing_details")
assert mat[0].metadata["count"].value == 0
def test_details_fetched_and_inserted(self):
engine, _, _ = make_mock_engine(select_rows=[("1234567",)])
client = MagicMock()
client.get_listing.return_value = make_mock_listing(_DETAIL_LISTING_DATA)
inserted = []
result = self._run(client, engine, inserted)
assert result.success
assert len(inserted) == 1
assert inserted[0]["city"] == "Amsterdam"
assert inserted[0]["status"] == "available"
assert inserted[0]["has_balcony"] is True
assert inserted[0]["has_garden"] is False
def test_failed_fetch_counted_as_error(self):
engine, _, _ = make_mock_engine(select_rows=[("1234567",), ("9999999",)])
client = MagicMock()
client.get_listing.side_effect = [
make_mock_listing(_DETAIL_LISTING_DATA),
RuntimeError("API error"),
]
inserted = []
result = self._run(client, engine, inserted)
assert result.success
mat = result.asset_materializations_for_node("funda_listing_details")
assert mat[0].metadata["errors"].value == 1
assert len(inserted) == 1
# ---------------------------------------------------------------------------
# funda_price_history
# ---------------------------------------------------------------------------
class TestFundaPriceHistory:
def _run(self, mock_client, engine, inserted_rows=None):
rows = inserted_rows if inserted_rows is not None else []
return materialize(
[funda_price_history],
resources={
"funda": MockFundaResource(mock_client),
"postgres": MockPostgresResource(engine, rows),
},
)
def test_no_details_returns_count_zero(self):
engine, _, _ = make_mock_engine(select_rows=[])
client = MagicMock()
result = self._run(client, engine)
assert result.success
mat = result.asset_materializations_for_node("funda_price_history")
assert mat[0].metadata["count"].value == 0
def test_price_history_inserted(self):
engine, _, _ = make_mock_engine(select_rows=[("1234567",)])
client = MagicMock()
client.get_listing.return_value = make_mock_listing(_DETAIL_LISTING_DATA)
client.get_price_history.return_value = [
{
"price": 350000,
"human_price": "\u20ac350.000",
"date": "1 jan, 2026",
"timestamp": "2026-01-01T00:00:00",
"source": "Funda",
"status": "asking_price",
},
{
"price": 320000,
"human_price": "\u20ac320.000",
"date": "1 jan, 2024",
"timestamp": "2024-01-01T00:00:00",
"source": "WOZ",
"status": "woz",
},
]
inserted = []
result = self._run(client, engine, inserted)
assert result.success
assert len(inserted) == 2
assert inserted[0]["source"] == "Funda"
assert inserted[1]["source"] == "WOZ"
mat = result.asset_materializations_for_node("funda_price_history")
assert mat[0].metadata["count"].value == 2
# ---------------------------------------------------------------------------
# FundaSearchConfig
# ---------------------------------------------------------------------------
class TestFundaSearchConfig:
def test_defaults(self):
cfg = FundaSearchConfig()
assert cfg.location == "amsterdam"
assert cfg.offering_type == "buy"
assert cfg.sort == "newest"
assert cfg.max_pages == 3
assert cfg.price_min is None
def test_custom_values(self):
cfg = FundaSearchConfig(
location="rotterdam",
offering_type="rent",
price_max=2000,
max_pages=1,
)
assert cfg.location == "rotterdam"
assert cfg.offering_type == "rent"
assert cfg.price_max == 2000
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

154
tests/test_helpers.py Normal file
View File

@@ -0,0 +1,154 @@
"""Tests for pure helper functions in data_platform.assets.funda."""
from data_platform.assets.funda import (
_details_preview_table,
_safe,
_safe_int,
_search_preview_table,
)
# ── _safe_int ───────────────────────────────────────────────────────────────
class TestSafeInt:
def test_none_returns_none(self):
assert _safe_int(None) is None
def test_integer_passthrough(self):
assert _safe_int(42) == 42
def test_negative_integer(self):
assert _safe_int(-10) == -10
def test_zero(self):
assert _safe_int(0) == 0
def test_string_int(self):
assert _safe_int("123") == 123
def test_float_truncated(self):
assert _safe_int(3.9) == 3
def test_float_string(self):
assert _safe_int("7.0") == 7
def test_non_numeric_string_returns_none(self):
assert _safe_int("abc") is None
def test_empty_string_returns_none(self):
assert _safe_int("") is None
def test_list_returns_none(self):
assert _safe_int([1, 2, 3]) is None
# ── _safe ────────────────────────────────────────────────────────────────────
class TestSafe:
def test_dict_becomes_json_string(self):
result = _safe({"key": "val"})
assert result == '{"key": "val"}'
def test_list_becomes_json_string(self):
result = _safe([1, 2, 3])
assert result == "[1, 2, 3]"
def test_tuple_becomes_json_string(self):
result = _safe((1, 2))
assert result == "[1, 2]"
def test_string_passthrough(self):
assert _safe("hello") == "hello"
def test_integer_passthrough(self):
assert _safe(99) == 99
def test_none_passthrough(self):
assert _safe(None) is None
def test_nested_dict_serialised(self):
data = {"a": {"b": [1, 2]}}
result = _safe(data)
import json
assert json.loads(result) == data
# ── _search_preview_table ────────────────────────────────────────────────────
class TestSearchPreviewTable:
def test_empty_rows_returns_header_only(self):
result = _search_preview_table([])
lines = result.split("\n")
assert len(lines) == 2
assert "Title" in lines[0]
assert "---" in lines[1]
def test_single_row_appears(self):
rows = [
{
"title": "Teststraat 1",
"city": "Amsterdam",
"price": 350000,
"living_area": 80,
"bedrooms": 3,
}
]
result = _search_preview_table(rows)
assert "Teststraat 1" in result
assert "Amsterdam" in result
assert "€350,000" in result
assert "80 m²" in result
assert "3" in result
def test_missing_price_shows_dash(self):
rows = [{"title": "No Price", "city": "Rotterdam", "price": None}]
result = _search_preview_table(rows)
assert "" in result
def test_missing_area_shows_dash(self):
rows = [{"title": "No Area", "city": "Utrecht", "living_area": None}]
result = _search_preview_table(rows)
assert "" in result
def test_multiple_rows_correct_count(self):
rows = [{"title": f"St {i}", "city": "City", "price": i * 1000} for i in range(5)]
result = _search_preview_table(rows)
lines = result.split("\n")
# header + separator + 5 data rows
assert len(lines) == 7
# ── _details_preview_table ───────────────────────────────────────────────────
class TestDetailsPreviewTable:
def test_empty_rows_returns_header_only(self):
result = _details_preview_table([])
lines = result.split("\n")
assert len(lines) == 2
assert "Title" in lines[0]
def test_row_with_all_fields(self):
rows = [
{
"title": "Kerkstraat 5",
"city": "Haarlem",
"price": 425000,
"status": "available",
"energy_label": "A",
}
]
result = _details_preview_table(rows)
assert "Kerkstraat 5" in result
assert "Haarlem" in result
assert "€425,000" in result
assert "available" in result
assert "A" in result
def test_missing_price_shows_dash(self):
rows = [{"title": "T", "city": "C", "price": None, "status": "sold"}]
result = _details_preview_table(rows)
assert "" in result

88
tests/test_resources.py Normal file
View File

@@ -0,0 +1,88 @@
"""Tests for data_platform.resources."""
from unittest.mock import MagicMock, patch
from data_platform.resources import FundaResource, PostgresResource
# ── FundaResource ─────────────────────────────────────────────────────────────
class TestFundaResource:
def test_get_client_returns_funda_instance(self):
resource = FundaResource(timeout=10)
from funda import Funda
client = resource.get_client()
assert isinstance(client, Funda)
def test_default_timeout(self):
resource = FundaResource()
assert resource.timeout == 30
def test_custom_timeout(self):
resource = FundaResource(timeout=60)
assert resource.timeout == 60
# ── PostgresResource ──────────────────────────────────────────────────────────
class TestPostgresResource:
def _make_resource(self, **kwargs):
defaults = {
"host": "testhost",
"port": 5432,
"user": "user",
"password": "pw",
"dbname": "db",
}
return PostgresResource(**{**defaults, **kwargs})
def test_connection_url_format(self):
res = self._make_resource()
# Patch at the module level so the frozen instance isn't mutated
with patch("data_platform.resources.create_engine") as mock_create:
mock_create.return_value = MagicMock()
res.get_engine()
call_url = mock_create.call_args[0][0]
assert "testhost" in call_url
assert "5432" in call_url
assert "user" in call_url
assert "pw" in call_url
assert "db" in call_url
def test_connection_url_scheme(self):
res = self._make_resource()
with patch("data_platform.resources.create_engine") as mock_create:
mock_create.return_value = MagicMock()
res.get_engine()
call_url = mock_create.call_args[0][0]
assert call_url.startswith("postgresql://")
def test_execute_calls_engine_begin(self):
"""execute() wraps its statement in engine.begin()."""
mock_engine = MagicMock()
mock_conn = MagicMock()
mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn)
mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False)
# Patch create_engine at module level so that get_engine() returns our mock
with patch("data_platform.resources.create_engine", return_value=mock_engine):
res = self._make_resource()
res.execute("SELECT 1")
mock_conn.execute.assert_called_once()
def test_execute_many_calls_engine_begin(self):
"""execute_many() wraps its statement in engine.begin()."""
mock_engine = MagicMock()
mock_conn = MagicMock()
mock_engine.begin.return_value.__enter__ = MagicMock(return_value=mock_conn)
mock_engine.begin.return_value.__exit__ = MagicMock(return_value=False)
with patch("data_platform.resources.create_engine", return_value=mock_engine):
res = self._make_resource()
rows = [{"id": 1}, {"id": 2}]
res.execute_many("INSERT INTO t VALUES (:id)", rows)
mock_conn.execute.assert_called_once()