feat: revise funda data process

This commit is contained in:
Stijnvandenbroek
2026-03-05 19:12:33 +00:00
parent ef0cddaa22
commit b959049fe8
11 changed files with 68 additions and 135 deletions

View File

@@ -1,29 +0,0 @@
-- Mart: per-city price statistics for available listings.
with listings as (
select * from {{ ref('funda_listings') }}
where not is_sold
),
city_stats as (
select
city,
province,
offering_type,
object_type,
count(*) as listing_count,
round(avg(current_price), 0) as avg_price,
min(current_price) as min_price,
max(current_price) as max_price,
percentile_cont(0.5) within group (
order by current_price
) as median_price,
round(avg(price_per_sqm), 0) as avg_price_per_sqm,
round(avg(living_area), 0) as avg_living_area,
round(avg(bedrooms), 1) as avg_bedrooms
from listings
where current_price is not null
group by city, province, offering_type, object_type
)
select * from city_stats

View File

@@ -1,64 +0,0 @@
version: 2
models:
- name: funda_city_stats
description: >
Aggregated price statistics per city, province, offering type and object type. Only includes
currently available (not sold) listings.
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: city
description: City name.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: province
description: Province name.
data_type: text
- name: offering_type
description: Buy or rent.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: object_type
description: Property type.
data_type: text
- name: listing_count
description: Number of active listings in this group.
data_type: bigint
constraints:
- type: not_null
tests:
- not_null
- dbt_utils.expression_is_true:
expression: "> 0"
- name: avg_price
description: Average asking price.
data_type: numeric
- name: min_price
description: Lowest asking price in this group.
data_type: bigint
- name: max_price
description: Highest asking price in this group.
data_type: bigint
- name: median_price
description: Median asking price.
data_type: double precision
- name: avg_price_per_sqm
description: Average price per square metre.
data_type: numeric
- name: avg_living_area
description: Average living area in m².
data_type: numeric
- name: avg_bedrooms
description: Average number of bedrooms.
data_type: numeric

View File

@@ -1,8 +1,20 @@
-- Mart: analysis-ready Funda listings table.
-- Selects the most useful fields and adds derived metrics.
-- Incrementally loads enriched listings, updating existing rows on re-ingestion.
{{
config(
materialized='incremental',
unique_key='global_id',
on_schema_change='fail'
)
}}
with enriched as (
select * from {{ ref('int_funda_listings_enriched') }}
select *
from {{ ref('int_funda_listings_enriched') }}
{% if is_incremental() %}
where ingested_at > (select max(ingested_at) from {{ this }}) -- noqa: RF02
{% endif %}
),
final as (