feat: optimize price history ingestion

This commit is contained in:
Stijnvandenbroek
2026-03-07 21:25:06 +00:00
parent 61abb37ef4
commit fea062fbaa
3 changed files with 21 additions and 11 deletions

View File

@@ -15,6 +15,7 @@ ref = "{% macro ref(model_name) %}{{ model_name }}{% endmacro %}"
[sqlfluff:templater:jinja:context]
schema = raw_funda
staleness_days = 7
[sqlfluff:indentation]
indent_unit = space

View File

@@ -55,6 +55,7 @@ class FundaPriceHistoryConfig(Config):
"""Config for price history fetch."""
fetch_all: bool = False
staleness_days: int = 7
@asset(
@@ -371,6 +372,7 @@ def raw_funda_price_history(
_SQL_DIR,
"dml/select_new_price_history_listings.sql",
schema=_SCHEMA,
staleness_days=config.staleness_days,
)
)
result = conn.execute(query)

View File

@@ -1,17 +1,24 @@
with last_price_history as (
select
global_id,
max(ingested_at) as last_ingested
from {{ schema }}.price_history
group by global_id
)
select distinct
d.global_id,
d.url,
d.title,
d.postcode
from {{ schema }}.listing_details as d
inner join {{ schema }}.search_results as s on d.global_id = s.global_id
where s.is_active = true
union
select distinct
d.global_id,
d.url,
d.title,
d.postcode
from {{ schema }}.listing_details as d
left join {{ schema }}.price_history as p on d.global_id = p.global_id
where p.global_id is null
inner join {{ schema }}.search_results as s
on d.global_id = s.global_id
left join last_price_history as ph
on d.global_id = ph.global_id
where
s.is_active = true
and (
ph.last_ingested is null
or ph.last_ingested < now() - interval '{{ staleness_days }} days'
)