feat: expand dbt models

This commit is contained in:
Stijnvandenbroek
2026-03-04 18:18:36 +00:00
parent 3e51d630e6
commit 65134183ca
9 changed files with 253 additions and 33 deletions

View File

@@ -18,5 +18,7 @@ models:
data_platform:
staging:
+materialized: view
intermediate:
+materialized: view
marts:
+materialized: table

View File

@@ -0,0 +1,47 @@
-- Intermediate model: enrich each listing with its most recent asking price
-- and the last recorded sold price from the price history.
with listings as (
select * from {{ ref('stg_funda_listings') }}
),
price_history as (
select * from {{ ref('stg_funda_price_history') }}
),
latest_asking as (
select distinct on (global_id)
global_id,
price as latest_asking_price,
price_date as latest_asking_date
from price_history
where
price_source = 'Funda'
and price_status = 'asking_price'
order by global_id asc, price_date desc
),
latest_sold as (
select distinct on (global_id)
global_id,
price as sold_price,
price_date as sold_date
from price_history
where price_status = 'sold'
order by global_id asc, price_date desc
),
enriched as (
select
l.*,
la.latest_asking_price,
la.latest_asking_date,
ls.sold_price,
ls.sold_date,
ls.sold_price is not null as is_sold
from listings as l
left join latest_asking as la on l.global_id = la.global_id
left join latest_sold as ls on l.global_id = ls.global_id
)
select * from enriched

View File

@@ -0,0 +1,26 @@
version: 2
models:
- name: int_funda_listings_enriched
description: >
Listings joined with the most recent asking price and last sold price from price history. One
row per listing.
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- unique
- not_null
- name: latest_asking_price
description: Most recent asking price from Funda price history.
- name: latest_asking_date
description: Date of the most recent asking price event.
- name: sold_price
description: Price at which the listing was sold, if applicable.
- name: sold_date
description: Date the listing was sold, if applicable.
- name: is_sold
description: True when a sold price event exists for this listing.

View File

@@ -0,0 +1,29 @@
-- Mart: per-city price statistics for available listings.
with listings as (
select * from {{ ref('funda_listings') }}
where not is_sold
),
city_stats as (
select
city,
province,
offering_type,
object_type,
count(*) as listing_count,
round(avg(current_price), 0) as avg_price,
min(current_price) as min_price,
max(current_price) as max_price,
percentile_cont(0.5) within group (
order by current_price
) as median_price,
round(avg(price_per_sqm), 0) as avg_price_per_sqm,
round(avg(living_area), 0) as avg_living_area,
round(avg(bedrooms), 1) as avg_bedrooms
from listings
where current_price is not null
group by city, province, offering_type, object_type
)
select * from city_stats

View File

@@ -0,0 +1,69 @@
-- Mart: analysis-ready Funda listings table.
-- Selects the most useful fields and adds derived metrics.
with enriched as (
select * from {{ ref('int_funda_listings_enriched') }}
),
final as (
select
-- identifiers
global_id,
tiny_id,
url,
-- location
title,
city,
postcode,
province,
neighbourhood,
municipality,
latitude,
longitude,
-- property characteristics
object_type,
house_type,
offering_type,
construction_type,
construction_year,
energy_label,
living_area,
plot_area,
bedrooms,
rooms,
has_garden,
has_balcony,
has_solar_panels,
has_heat_pump,
has_roof_terrace,
is_energy_efficient,
is_monument,
-- pricing
price as current_price,
latest_asking_price,
latest_asking_date,
sold_price,
sold_date,
is_sold,
-- derived
photo_count,
-- engagement
views,
saves,
status,
-- meta
publication_date,
ingested_at,
case
when living_area > 0 then round(price::numeric / living_area, 0)
end as price_per_sqm
from enriched
)
select * from final

View File

@@ -0,0 +1,47 @@
version: 2
models:
- name: funda_listings
description: >
Analysis-ready Funda listings table. One row per listing, enriched with price history, derived
metrics like price per sqm, and all cleaned fields from staging.
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- unique
- not_null
- name: current_price
description: Current asking or rental price in euros.
- name: price_per_sqm
description: Current price divided by living area in m².
- name: is_sold
description: True when a sold price event exists for this listing.
- name: sold_price
description: Final sold price, null if still available.
- name: sold_date
description: Date sold, null if still available.
- name: funda_city_stats
description: >
Aggregated price statistics per city, province, offering type and object type. Only includes
currently available (not sold) listings.
meta:
dagster:
group: funda
columns:
- name: city
description: City name.
tests:
- not_null
- name: listing_count
description: Number of active listings in this group.
- name: avg_price
description: Average asking price.
- name: median_price
description: Median asking price.
- name: avg_price_per_sqm
description: Average price per square metre.

View File

@@ -1,33 +0,0 @@
version: 2
models:
- name: stg_funda_listings
description: >
Cleaned Funda listing details one row per property.
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- unique
- not_null
- name: city
description: City name.
- name: price
description: Asking or rental price in euros.
- name: stg_funda_price_history
description: >
Historical price events per listing (asking prices, WOZ assessments, sales).
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- not_null
- name: price
description: Price at this point in time.

View File

@@ -0,0 +1,18 @@
version: 2
models:
- name: stg_funda_listings
description: Cleaned Funda listing details one row per property.
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- unique
- not_null
- name: city
description: City name.
- name: price
description: Asking or rental price in euros.

View File

@@ -0,0 +1,15 @@
version: 2
models:
- name: stg_funda_price_history
description: Historical price events per listing (asking prices, WOZ assessments, sales).
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- not_null
- name: price
description: Price at this point in time.