feat: optimize price history ingestion
This commit is contained in:
@@ -15,6 +15,7 @@ ref = "{% macro ref(model_name) %}{{ model_name }}{% endmacro %}"
|
|||||||
|
|
||||||
[sqlfluff:templater:jinja:context]
|
[sqlfluff:templater:jinja:context]
|
||||||
schema = raw_funda
|
schema = raw_funda
|
||||||
|
staleness_days = 7
|
||||||
|
|
||||||
[sqlfluff:indentation]
|
[sqlfluff:indentation]
|
||||||
indent_unit = space
|
indent_unit = space
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ class FundaPriceHistoryConfig(Config):
|
|||||||
"""Config for price history fetch."""
|
"""Config for price history fetch."""
|
||||||
|
|
||||||
fetch_all: bool = False
|
fetch_all: bool = False
|
||||||
|
staleness_days: int = 7
|
||||||
|
|
||||||
|
|
||||||
@asset(
|
@asset(
|
||||||
@@ -371,6 +372,7 @@ def raw_funda_price_history(
|
|||||||
_SQL_DIR,
|
_SQL_DIR,
|
||||||
"dml/select_new_price_history_listings.sql",
|
"dml/select_new_price_history_listings.sql",
|
||||||
schema=_SCHEMA,
|
schema=_SCHEMA,
|
||||||
|
staleness_days=config.staleness_days,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
result = conn.execute(query)
|
result = conn.execute(query)
|
||||||
|
|||||||
@@ -1,17 +1,24 @@
|
|||||||
|
with last_price_history as (
|
||||||
|
select
|
||||||
|
global_id,
|
||||||
|
max(ingested_at) as last_ingested
|
||||||
|
from {{ schema }}.price_history
|
||||||
|
group by global_id
|
||||||
|
)
|
||||||
|
|
||||||
select distinct
|
select distinct
|
||||||
d.global_id,
|
d.global_id,
|
||||||
d.url,
|
d.url,
|
||||||
d.title,
|
d.title,
|
||||||
d.postcode
|
d.postcode
|
||||||
from {{ schema }}.listing_details as d
|
from {{ schema }}.listing_details as d
|
||||||
inner join {{ schema }}.search_results as s on d.global_id = s.global_id
|
inner join {{ schema }}.search_results as s
|
||||||
where s.is_active = true
|
on d.global_id = s.global_id
|
||||||
union
|
left join last_price_history as ph
|
||||||
select distinct
|
on d.global_id = ph.global_id
|
||||||
d.global_id,
|
where
|
||||||
d.url,
|
s.is_active = true
|
||||||
d.title,
|
and (
|
||||||
d.postcode
|
ph.last_ingested is null
|
||||||
from {{ schema }}.listing_details as d
|
or ph.last_ingested < now() - interval '{{ staleness_days }} days'
|
||||||
left join {{ schema }}.price_history as p on d.global_id = p.global_id
|
)
|
||||||
where p.global_id is null
|
|
||||||
|
|||||||
Reference in New Issue
Block a user