feat: expand testing

This commit is contained in:
Stijnvandenbroek
2026-03-04 22:18:30 +00:00
parent 0d2706a93e
commit 0b9b408714
22 changed files with 1266 additions and 54 deletions

View File

@@ -5,22 +5,153 @@ models:
description: >
Listings joined with the most recent asking price and last sold price from price history. One
row per listing.
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
data_type: text
constraints:
- type: not_null
- type: unique
tests:
- unique
- not_null
- name: tiny_id
description: Public ID used in Funda URLs.
data_type: text
- name: title
description: Property address / title.
data_type: text
- name: city
description: City name.
data_type: text
tests:
- not_null
- name: postcode
description: Dutch postal code.
data_type: text
- name: province
description: Province name.
data_type: text
- name: neighbourhood
description: Neighbourhood name.
data_type: text
- name: municipality
description: Municipality name.
data_type: text
- name: price
description: Asking price from the listing details.
data_type: bigint
- name: price_formatted
description: Human-readable price string.
data_type: text
- name: status
description: Listing status.
data_type: text
tests:
- not_null
- name: offering_type
description: Buy or rent.
data_type: text
tests:
- not_null
- name: object_type
description: Property type.
data_type: text
- name: house_type
description: Sub-type of the property.
data_type: text
- name: construction_type
description: Construction method.
data_type: text
- name: construction_year
description: Year the property was built.
data_type: text
- name: energy_label
description: Dutch energy performance label (AG).
data_type: text
- name: living_area
description: Interior floor area in m².
data_type: integer
- name: plot_area
description: Total plot area in m².
data_type: integer
- name: bedrooms
description: Number of bedrooms.
data_type: integer
- name: rooms
description: Total number of rooms.
data_type: integer
- name: publication_date
description: Listing publication date.
data_type: text
- name: latitude
description: Latitude coordinate.
data_type: double precision
- name: longitude
description: Longitude coordinate.
data_type: double precision
- name: has_garden
description: Whether the property has a garden.
data_type: boolean
- name: has_balcony
description: Whether the property has a balcony.
data_type: boolean
- name: has_solar_panels
description: Whether solar panels are present.
data_type: boolean
- name: has_heat_pump
description: Whether a heat pump is installed.
data_type: boolean
- name: has_roof_terrace
description: Whether the property has a roof terrace.
data_type: boolean
- name: is_energy_efficient
description: Whether the listing is flagged as energy efficient.
data_type: boolean
- name: is_monument
description: Whether the property is a protected monument.
data_type: boolean
- name: url
description: Direct link to the Funda listing.
data_type: text
- name: photo_count
description: Number of photos on the listing.
data_type: integer
- name: views
description: Number of times the listing was viewed.
data_type: integer
- name: saves
description: Number of times the listing was saved as favourite.
data_type: integer
- name: latest_asking_price
description: Most recent asking price from Funda price history.
data_type: bigint
- name: latest_asking_date
description: Date of the most recent asking price event.
data_type: text
- name: sold_price
description: Price at which the listing was sold, if applicable.
data_type: bigint
- name: sold_date
description: Date the listing was sold, if applicable.
data_type: text
- name: is_sold
description: True when a sold price event exists for this listing.
data_type: boolean
constraints:
- type: not_null
tests:
- not_null
- name: ingested_at
description: Timestamp when the raw row was first written.
data_type: timestamptz
constraints:
- type: not_null
tests:
- not_null

View File

@@ -0,0 +1,64 @@
version: 2
models:
- name: funda_city_stats
description: >
Aggregated price statistics per city, province, offering type and object type. Only includes
currently available (not sold) listings.
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: city
description: City name.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: province
description: Province name.
data_type: text
- name: offering_type
description: Buy or rent.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: object_type
description: Property type.
data_type: text
- name: listing_count
description: Number of active listings in this group.
data_type: bigint
constraints:
- type: not_null
tests:
- not_null
- dbt_utils.expression_is_true:
expression: "> 0"
- name: avg_price
description: Average asking price.
data_type: numeric
- name: min_price
description: Lowest asking price in this group.
data_type: bigint
- name: max_price
description: Highest asking price in this group.
data_type: bigint
- name: median_price
description: Median asking price.
data_type: double precision
- name: avg_price_per_sqm
description: Average price per square metre.
data_type: numeric
- name: avg_living_area
description: Average living area in m².
data_type: numeric
- name: avg_bedrooms
description: Average number of bedrooms.
data_type: numeric

View File

@@ -5,43 +5,171 @@ models:
description: >
Analysis-ready Funda listings table. One row per listing, enriched with price history, derived
metrics like price per sqm, and all cleaned fields from staging.
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
data_type: text
constraints:
- type: not_null
- type: unique
tests:
- unique
- not_null
- name: current_price
description: Current asking or rental price in euros.
- name: price_per_sqm
description: Current price divided by living area in m².
- name: is_sold
description: True when a sold price event exists for this listing.
- name: sold_price
description: Final sold price, null if still available.
- name: sold_date
description: Date sold, null if still available.
- name: funda_city_stats
description: >
Aggregated price statistics per city, province, offering type and object type. Only includes
currently available (not sold) listings.
meta:
dagster:
group: funda
columns:
- name: tiny_id
description: Public ID used in Funda URLs.
data_type: text
- name: url
description: Direct link to the Funda listing.
data_type: text
- name: title
description: Property address / title.
data_type: text
- name: city
description: City name.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: listing_count
description: Number of active listings in this group.
- name: avg_price
description: Average asking price.
- name: median_price
description: Median asking price.
- name: avg_price_per_sqm
description: Average price per square metre.
- name: postcode
description: Dutch postal code.
data_type: text
- name: province
description: Province name.
data_type: text
- name: neighbourhood
description: Neighbourhood name.
data_type: text
- name: municipality
description: Municipality name.
data_type: text
- name: latitude
description: Latitude coordinate.
data_type: double precision
- name: longitude
description: Longitude coordinate.
data_type: double precision
- name: object_type
description: Property type.
data_type: text
- name: house_type
description: Sub-type of the property.
data_type: text
- name: offering_type
description: Buy or rent.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: construction_type
description: Construction method.
data_type: text
- name: construction_year
description: Year the property was built.
data_type: text
- name: energy_label
description: Dutch energy performance label (AG).
data_type: text
- name: living_area
description: Interior floor area in m².
data_type: integer
tests:
- dbt_utils.expression_is_true:
expression: "> 0"
where: "living_area is not null"
- name: plot_area
description: Total plot area in m².
data_type: integer
- name: bedrooms
description: Number of bedrooms.
data_type: integer
- name: rooms
description: Total number of rooms.
data_type: integer
- name: has_garden
description: Whether the property has a garden.
data_type: boolean
- name: has_balcony
description: Whether the property has a balcony.
data_type: boolean
- name: has_solar_panels
description: Whether solar panels are present.
data_type: boolean
- name: has_heat_pump
description: Whether a heat pump is installed.
data_type: boolean
- name: has_roof_terrace
description: Whether the property has a roof terrace.
data_type: boolean
- name: is_energy_efficient
description: Whether the listing is flagged as energy efficient.
data_type: boolean
- name: is_monument
description: Whether the property is a protected monument.
data_type: boolean
- name: current_price
description: Current asking or rental price in euros.
data_type: bigint
tests:
- dbt_utils.expression_is_true:
expression: "> 0"
where: "current_price is not null"
- name: latest_asking_price
description: Most recent asking price from price history.
data_type: bigint
- name: latest_asking_date
description: Date of the most recent asking price event.
data_type: text
- name: sold_price
description: Final sold price, null if still available.
data_type: bigint
- name: sold_date
description: Date sold, null if still available.
data_type: text
- name: is_sold
description: True when a sold price event exists for this listing.
data_type: boolean
constraints:
- type: not_null
tests:
- not_null
- name: photo_count
description: Number of photos on the listing.
data_type: integer
- name: views
description: Number of times the listing was viewed.
data_type: integer
- name: saves
description: Number of times the listing was saved as favourite.
data_type: integer
- name: status
description: Listing status.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: publication_date
description: Listing publication date.
data_type: text
- name: ingested_at
description: Timestamp when the raw row was first written.
data_type: timestamptz
constraints:
- type: not_null
tests:
- not_null
- name: price_per_sqm
description: Current price divided by living area in m².
data_type: numeric
tests:
- dbt_utils.expression_is_true:
expression: "> 0"
where: "price_per_sqm is not null"

View File

@@ -11,17 +11,40 @@ sources:
meta:
dagster:
asset_key: ["funda_search_results"]
loaded_at_field: last_seen_at
freshness:
warn_after: { count: 12, period: hour }
error_after: { count: 25, period: hour }
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- unique
- not_null
- name: title
description: Property address / title.
tests:
- not_null
- name: city
description: City name.
tests:
- not_null
- name: price
description: Asking or rental price in euros.
- name: is_active
description: False when the listing has not appeared in search results for 7+ days.
tests:
- not_null
- accepted_values:
values: [true, false]
- name: last_seen_at
description: Timestamp the listing was last returned by the Funda search API.
tests:
- not_null
- name: ingested_at
description: Timestamp when the row was written.
description: Timestamp when the row was first written.
tests:
- not_null
- name: listing_details
description: >
@@ -29,17 +52,44 @@ sources:
meta:
dagster:
asset_key: ["funda_listing_details"]
loaded_at_field: last_fetched_at
freshness:
warn_after: { count: 25, period: hour }
error_after: { count: 49, period: hour }
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- not_null
- relationships:
to: source('raw_funda', 'search_results')
field: global_id
- name: tiny_id
description: Public ID used in Funda URLs.
tests:
- not_null
- name: price
description: Asking or rental price in euros.
- name: status
description: Listing status (available or sold).
tests:
- not_null
- accepted_values:
values: ["available", "sold", "withdrawn", "under_negotiation"]
- name: is_stale
description: True when the parent search listing is no longer active.
tests:
- not_null
- accepted_values:
values: [true, false]
- name: last_fetched_at
description: Timestamp of the most recent detail fetch.
tests:
- not_null
- name: ingested_at
description: Timestamp when the row was written.
description: Timestamp when the row was first written.
tests:
- not_null
- name: price_history
description: >
@@ -47,14 +97,37 @@ sources:
meta:
dagster:
asset_key: ["funda_price_history"]
loaded_at_field: ingested_at
freshness:
warn_after: { count: 25, period: hour }
error_after: { count: 49, period: hour }
columns:
- name: global_id
description: Funda internal listing ID.
tests:
- not_null
- relationships:
to: source('raw_funda', 'listing_details')
field: global_id
- name: price
description: Price at this point in time.
- name: date
description: Date of this price event.
tests:
- not_null
- name: source
description: Price data source (Funda or WOZ).
description: Price data source (funda or woz).
tests:
- not_null
- accepted_values:
values: ["funda", "woz", "cadastre"]
- name: status
description: Price event type (asking_price, sold, or woz).
tests:
- not_null
- accepted_values:
values: ["asking_price", "sold", "woz", "withdrawn"]
- name: ingested_at
description: Timestamp when the row was written.
tests:
- not_null

View File

@@ -3,16 +3,156 @@ version: 2
models:
- name: stg_funda_listings
description: Cleaned Funda listing details one row per property.
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
data_type: text
constraints:
- type: not_null
- type: unique
tests:
- unique
- not_null
- name: tiny_id
description: Public ID used in Funda URLs.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: title
description: Property address / title.
data_type: text
- name: city
description: City name.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: postcode
description: Dutch postal code.
data_type: text
- name: province
description: Province name.
data_type: text
- name: neighbourhood
description: Neighbourhood name.
data_type: text
- name: municipality
description: Municipality name.
data_type: text
- name: price
description: Asking or rental price in euros.
data_type: bigint
tests:
- dbt_utils.expression_is_true:
expression: ">= 0"
where: "price is not null"
- name: price_formatted
description: Human-readable price string from Funda.
data_type: text
- name: status
description: Listing status.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- accepted_values:
values: ["available", "sold", "withdrawn", "under_negotiation"]
- name: offering_type
description: Buy or rent.
data_type: text
tests:
- not_null
- name: object_type
description: Property type (house, apartment, etc.).
data_type: text
- name: house_type
description: Sub-type of the property.
data_type: text
- name: construction_type
description: Construction method.
data_type: text
- name: construction_year
description: Year the property was built.
data_type: text
- name: energy_label
description: Dutch energy performance label (AG).
data_type: text
tests:
- accepted_values:
values: ["A+++", "A++", "A+", "A", "B", "C", "D", "E", "F", "G"]
where: "energy_label is not null"
- name: living_area
description: Interior floor area in m².
data_type: integer
tests:
- dbt_utils.expression_is_true:
expression: "> 0"
where: "living_area is not null"
- name: plot_area
description: Total plot area in m².
data_type: integer
- name: bedrooms
description: Number of bedrooms.
data_type: integer
- name: rooms
description: Total number of rooms.
data_type: integer
- name: publication_date
description: Listing publication date.
data_type: text
- name: latitude
description: Latitude coordinate.
data_type: double precision
- name: longitude
description: Longitude coordinate.
data_type: double precision
- name: has_garden
description: Whether the property has a garden.
data_type: boolean
- name: has_balcony
description: Whether the property has a balcony.
data_type: boolean
- name: has_solar_panels
description: Whether solar panels are present.
data_type: boolean
- name: has_heat_pump
description: Whether a heat pump is installed.
data_type: boolean
- name: has_roof_terrace
description: Whether the property has a roof terrace.
data_type: boolean
- name: is_energy_efficient
description: Whether the listing is flagged as energy efficient.
data_type: boolean
- name: is_monument
description: Whether the property is a protected monument.
data_type: boolean
- name: url
description: Direct link to the Funda listing.
data_type: text
- name: photo_count
description: Number of photos on the listing.
data_type: integer
- name: views
description: Number of times the listing was viewed.
data_type: integer
- name: saves
description: Number of times the listing was saved as favourite.
data_type: integer
- name: ingested_at
description: Timestamp when the row was written to the raw table.
data_type: timestamptz
constraints:
- type: not_null
tests:
- not_null

View File

@@ -3,13 +3,65 @@ version: 2
models:
- name: stg_funda_price_history
description: Historical price events per listing (asking prices, WOZ assessments, sales).
config:
contract:
enforced: true
meta:
dagster:
group: funda
columns:
- name: global_id
description: Funda internal listing ID.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- relationships:
to: ref('stg_funda_listings')
field: global_id
- name: price
description: Price at this point in time.
data_type: bigint
tests:
- dbt_utils.expression_is_true:
expression: ">= 0"
where: "price is not null"
- name: human_price
description: Human-readable price string.
data_type: text
- name: price_date
description: Date of this price event.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- name: price_timestamp
description: Timestamp of this price event.
data_type: text
- name: price_source
description: Source of the price data.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- accepted_values:
values: ["funda", "woz", "cadastre"]
- name: price_status
description: Type of price event.
data_type: text
constraints:
- type: not_null
tests:
- not_null
- accepted_values:
values: ["asking_price", "sold", "woz", "withdrawn"]
- name: ingested_at
description: Timestamp when the row was written to the raw table.
data_type: timestamptz
constraints:
- type: not_null
tests:
- not_null