diff --git a/dbt/dbt_project.yml b/dbt/dbt_project.yml index 08a2da4..569ceab 100644 --- a/dbt/dbt_project.yml +++ b/dbt/dbt_project.yml @@ -18,5 +18,7 @@ models: data_platform: staging: +materialized: view + intermediate: + +materialized: view marts: +materialized: table diff --git a/dbt/models/intermediate/int_funda_listings_enriched.sql b/dbt/models/intermediate/int_funda_listings_enriched.sql new file mode 100644 index 0000000..1887eb8 --- /dev/null +++ b/dbt/models/intermediate/int_funda_listings_enriched.sql @@ -0,0 +1,47 @@ +-- Intermediate model: enrich each listing with its most recent asking price +-- and the last recorded sold price from the price history. + +with listings as ( + select * from {{ ref('stg_funda_listings') }} +), + +price_history as ( + select * from {{ ref('stg_funda_price_history') }} +), + +latest_asking as ( + select distinct on (global_id) + global_id, + price as latest_asking_price, + price_date as latest_asking_date + from price_history + where + price_source = 'Funda' + and price_status = 'asking_price' + order by global_id asc, price_date desc +), + +latest_sold as ( + select distinct on (global_id) + global_id, + price as sold_price, + price_date as sold_date + from price_history + where price_status = 'sold' + order by global_id asc, price_date desc +), + +enriched as ( + select + l.*, + la.latest_asking_price, + la.latest_asking_date, + ls.sold_price, + ls.sold_date, + ls.sold_price is not null as is_sold + from listings as l + left join latest_asking as la on l.global_id = la.global_id + left join latest_sold as ls on l.global_id = ls.global_id +) + +select * from enriched diff --git a/dbt/models/intermediate/int_funda_listings_enriched.yml b/dbt/models/intermediate/int_funda_listings_enriched.yml new file mode 100644 index 0000000..4f5ef09 --- /dev/null +++ b/dbt/models/intermediate/int_funda_listings_enriched.yml @@ -0,0 +1,26 @@ +version: 2 + +models: + - name: int_funda_listings_enriched + description: > + Listings joined with the most recent asking price and last sold price from price history. One + row per listing. + meta: + dagster: + group: funda + columns: + - name: global_id + description: Funda internal listing ID. + tests: + - unique + - not_null + - name: latest_asking_price + description: Most recent asking price from Funda price history. + - name: latest_asking_date + description: Date of the most recent asking price event. + - name: sold_price + description: Price at which the listing was sold, if applicable. + - name: sold_date + description: Date the listing was sold, if applicable. + - name: is_sold + description: True when a sold price event exists for this listing. diff --git a/dbt/models/marts/funda_city_stats.sql b/dbt/models/marts/funda_city_stats.sql new file mode 100644 index 0000000..cf8acf9 --- /dev/null +++ b/dbt/models/marts/funda_city_stats.sql @@ -0,0 +1,29 @@ +-- Mart: per-city price statistics for available listings. + +with listings as ( + select * from {{ ref('funda_listings') }} + where not is_sold +), + +city_stats as ( + select + city, + province, + offering_type, + object_type, + count(*) as listing_count, + round(avg(current_price), 0) as avg_price, + min(current_price) as min_price, + max(current_price) as max_price, + percentile_cont(0.5) within group ( + order by current_price + ) as median_price, + round(avg(price_per_sqm), 0) as avg_price_per_sqm, + round(avg(living_area), 0) as avg_living_area, + round(avg(bedrooms), 1) as avg_bedrooms + from listings + where current_price is not null + group by city, province, offering_type, object_type +) + +select * from city_stats diff --git a/dbt/models/marts/funda_listings.sql b/dbt/models/marts/funda_listings.sql new file mode 100644 index 0000000..ef11b6d --- /dev/null +++ b/dbt/models/marts/funda_listings.sql @@ -0,0 +1,69 @@ +-- Mart: analysis-ready Funda listings table. +-- Selects the most useful fields and adds derived metrics. + +with enriched as ( + select * from {{ ref('int_funda_listings_enriched') }} +), + +final as ( + select + -- identifiers + global_id, + tiny_id, + url, + + -- location + title, + city, + postcode, + province, + neighbourhood, + municipality, + latitude, + longitude, + + -- property characteristics + object_type, + house_type, + offering_type, + construction_type, + construction_year, + energy_label, + living_area, + plot_area, + bedrooms, + rooms, + has_garden, + has_balcony, + has_solar_panels, + has_heat_pump, + has_roof_terrace, + is_energy_efficient, + is_monument, + + -- pricing + price as current_price, + latest_asking_price, + latest_asking_date, + sold_price, + sold_date, + is_sold, + + -- derived + photo_count, + + -- engagement + views, + saves, + status, + + -- meta + publication_date, + ingested_at, + case + when living_area > 0 then round(price::numeric / living_area, 0) + end as price_per_sqm + from enriched +) + +select * from final diff --git a/dbt/models/marts/funda_listings.yml b/dbt/models/marts/funda_listings.yml new file mode 100644 index 0000000..7f85567 --- /dev/null +++ b/dbt/models/marts/funda_listings.yml @@ -0,0 +1,47 @@ +version: 2 + +models: + - name: funda_listings + description: > + Analysis-ready Funda listings table. One row per listing, enriched with price history, derived + metrics like price per sqm, and all cleaned fields from staging. + meta: + dagster: + group: funda + columns: + - name: global_id + description: Funda internal listing ID. + tests: + - unique + - not_null + - name: current_price + description: Current asking or rental price in euros. + - name: price_per_sqm + description: Current price divided by living area in m². + - name: is_sold + description: True when a sold price event exists for this listing. + - name: sold_price + description: Final sold price, null if still available. + - name: sold_date + description: Date sold, null if still available. + + - name: funda_city_stats + description: > + Aggregated price statistics per city, province, offering type and object type. Only includes + currently available (not sold) listings. + meta: + dagster: + group: funda + columns: + - name: city + description: City name. + tests: + - not_null + - name: listing_count + description: Number of active listings in this group. + - name: avg_price + description: Average asking price. + - name: median_price + description: Median asking price. + - name: avg_price_per_sqm + description: Average price per square metre. diff --git a/dbt/models/staging/schema.yml b/dbt/models/staging/schema.yml deleted file mode 100644 index a3c5875..0000000 --- a/dbt/models/staging/schema.yml +++ /dev/null @@ -1,33 +0,0 @@ -version: 2 - -models: - - name: stg_funda_listings - description: > - Cleaned Funda listing details – one row per property. - meta: - dagster: - group: funda - columns: - - name: global_id - description: Funda internal listing ID. - tests: - - unique - - not_null - - name: city - description: City name. - - name: price - description: Asking or rental price in euros. - - - name: stg_funda_price_history - description: > - Historical price events per listing (asking prices, WOZ assessments, sales). - meta: - dagster: - group: funda - columns: - - name: global_id - description: Funda internal listing ID. - tests: - - not_null - - name: price - description: Price at this point in time. diff --git a/dbt/models/staging/stg_funda_listings.yml b/dbt/models/staging/stg_funda_listings.yml new file mode 100644 index 0000000..a3af55e --- /dev/null +++ b/dbt/models/staging/stg_funda_listings.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: stg_funda_listings + description: Cleaned Funda listing details – one row per property. + meta: + dagster: + group: funda + columns: + - name: global_id + description: Funda internal listing ID. + tests: + - unique + - not_null + - name: city + description: City name. + - name: price + description: Asking or rental price in euros. diff --git a/dbt/models/staging/stg_funda_price_history.yml b/dbt/models/staging/stg_funda_price_history.yml new file mode 100644 index 0000000..f87cfd9 --- /dev/null +++ b/dbt/models/staging/stg_funda_price_history.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: stg_funda_price_history + description: Historical price events per listing (asking prices, WOZ assessments, sales). + meta: + dagster: + group: funda + columns: + - name: global_id + description: Funda internal listing ID. + tests: + - not_null + - name: price + description: Price at this point in time.