feat: revise funda data process
This commit is contained in:
@@ -1,29 +0,0 @@
|
||||
-- Mart: per-city price statistics for available listings.
|
||||
|
||||
with listings as (
|
||||
select * from {{ ref('funda_listings') }}
|
||||
where not is_sold
|
||||
),
|
||||
|
||||
city_stats as (
|
||||
select
|
||||
city,
|
||||
province,
|
||||
offering_type,
|
||||
object_type,
|
||||
count(*) as listing_count,
|
||||
round(avg(current_price), 0) as avg_price,
|
||||
min(current_price) as min_price,
|
||||
max(current_price) as max_price,
|
||||
percentile_cont(0.5) within group (
|
||||
order by current_price
|
||||
) as median_price,
|
||||
round(avg(price_per_sqm), 0) as avg_price_per_sqm,
|
||||
round(avg(living_area), 0) as avg_living_area,
|
||||
round(avg(bedrooms), 1) as avg_bedrooms
|
||||
from listings
|
||||
where current_price is not null
|
||||
group by city, province, offering_type, object_type
|
||||
)
|
||||
|
||||
select * from city_stats
|
||||
@@ -1,64 +0,0 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: funda_city_stats
|
||||
description: >
|
||||
Aggregated price statistics per city, province, offering type and object type. Only includes
|
||||
currently available (not sold) listings.
|
||||
config:
|
||||
contract:
|
||||
enforced: true
|
||||
meta:
|
||||
dagster:
|
||||
group: funda
|
||||
columns:
|
||||
- name: city
|
||||
description: City name.
|
||||
data_type: text
|
||||
constraints:
|
||||
- type: not_null
|
||||
tests:
|
||||
- not_null
|
||||
- name: province
|
||||
description: Province name.
|
||||
data_type: text
|
||||
- name: offering_type
|
||||
description: Buy or rent.
|
||||
data_type: text
|
||||
constraints:
|
||||
- type: not_null
|
||||
tests:
|
||||
- not_null
|
||||
- name: object_type
|
||||
description: Property type.
|
||||
data_type: text
|
||||
- name: listing_count
|
||||
description: Number of active listings in this group.
|
||||
data_type: bigint
|
||||
constraints:
|
||||
- type: not_null
|
||||
tests:
|
||||
- not_null
|
||||
- dbt_utils.expression_is_true:
|
||||
expression: "> 0"
|
||||
- name: avg_price
|
||||
description: Average asking price.
|
||||
data_type: numeric
|
||||
- name: min_price
|
||||
description: Lowest asking price in this group.
|
||||
data_type: bigint
|
||||
- name: max_price
|
||||
description: Highest asking price in this group.
|
||||
data_type: bigint
|
||||
- name: median_price
|
||||
description: Median asking price.
|
||||
data_type: double precision
|
||||
- name: avg_price_per_sqm
|
||||
description: Average price per square metre.
|
||||
data_type: numeric
|
||||
- name: avg_living_area
|
||||
description: Average living area in m².
|
||||
data_type: numeric
|
||||
- name: avg_bedrooms
|
||||
description: Average number of bedrooms.
|
||||
data_type: numeric
|
||||
@@ -1,8 +1,20 @@
|
||||
-- Mart: analysis-ready Funda listings table.
|
||||
-- Selects the most useful fields and adds derived metrics.
|
||||
-- Incrementally loads enriched listings, updating existing rows on re-ingestion.
|
||||
|
||||
{{
|
||||
config(
|
||||
materialized='incremental',
|
||||
unique_key='global_id',
|
||||
on_schema_change='fail'
|
||||
)
|
||||
}}
|
||||
|
||||
with enriched as (
|
||||
select * from {{ ref('int_funda_listings_enriched') }}
|
||||
select *
|
||||
from {{ ref('int_funda_listings_enriched') }}
|
||||
{% if is_incremental() %}
|
||||
where ingested_at > (select max(ingested_at) from {{ this }}) -- noqa: RF02
|
||||
{% endif %}
|
||||
),
|
||||
|
||||
final as (
|
||||
|
||||
@@ -10,7 +10,7 @@ sources:
|
||||
description: Funda search results (broad overview of matching listings).
|
||||
meta:
|
||||
dagster:
|
||||
asset_key: ["funda_search_results"]
|
||||
asset_key: ["raw_funda_search_results"]
|
||||
loaded_at_field: last_seen_at
|
||||
freshness:
|
||||
warn_after: { count: 12, period: hour }
|
||||
@@ -51,7 +51,7 @@ sources:
|
||||
Full listing details fetched per search result (50+ fields).
|
||||
meta:
|
||||
dagster:
|
||||
asset_key: ["funda_listing_details"]
|
||||
asset_key: ["raw_funda_listing_details"]
|
||||
loaded_at_field: last_fetched_at
|
||||
freshness:
|
||||
warn_after: { count: 25, period: hour }
|
||||
@@ -96,7 +96,7 @@ sources:
|
||||
Historical price data per listing (asking prices, WOZ, sales).
|
||||
meta:
|
||||
dagster:
|
||||
asset_key: ["funda_price_history"]
|
||||
asset_key: ["raw_funda_price_history"]
|
||||
loaded_at_field: ingested_at
|
||||
freshness:
|
||||
warn_after: { count: 25, period: hour }
|
||||
|
||||
Reference in New Issue
Block a user