feat: revise funda data process

This commit is contained in:
Stijnvandenbroek
2026-03-05 19:12:33 +00:00
parent ef0cddaa22
commit b959049fe8
11 changed files with 68 additions and 135 deletions

View File

@@ -1,9 +1,13 @@
"""Funda ingestion assets."""
from data_platform.assets.ingestion.funda.funda import (
funda_listing_details,
funda_price_history,
funda_search_results,
raw_funda_listing_details,
raw_funda_price_history,
raw_funda_search_results,
)
__all__ = ["funda_listing_details", "funda_price_history", "funda_search_results"]
__all__ = [
"raw_funda_listing_details",
"raw_funda_price_history",
"raw_funda_search_results",
]

View File

@@ -60,7 +60,7 @@ class FundaPriceHistoryConfig(Config):
kinds={"python", "postgres"},
description="Search Funda listings and store results in Postgres.",
)
def funda_search_results(
def raw_funda_search_results(
context: AssetExecutionContext,
config: FundaSearchConfig,
funda: FundaResource,
@@ -189,10 +189,10 @@ def funda_search_results(
@asset(
group_name="funda",
kinds={"python", "postgres"},
deps=[funda_search_results],
deps=[raw_funda_search_results],
description="Fetch full listing details for each search result and store in Postgres.",
)
def funda_listing_details(
def raw_funda_listing_details(
context: AssetExecutionContext,
config: FundaDetailsConfig,
funda: FundaResource,
@@ -332,10 +332,10 @@ def funda_listing_details(
@asset(
group_name="funda",
kinds={"python", "postgres"},
deps=[funda_listing_details],
deps=[raw_funda_listing_details],
description="Fetch price history for each detailed listing and store in Postgres.",
)
def funda_price_history(
def raw_funda_price_history(
context: AssetExecutionContext,
config: FundaPriceHistoryConfig,
funda: FundaResource,

View File

@@ -3,9 +3,9 @@ from dagster_dbt import DbtCliResource
from data_platform.assets.dbt import DBT_PROJECT_DIR, dbt_project_assets
from data_platform.assets.ingestion.funda import (
funda_listing_details,
funda_price_history,
funda_search_results,
raw_funda_listing_details,
raw_funda_price_history,
raw_funda_search_results,
)
from data_platform.helpers import apply_automation
from data_platform.jobs import (
@@ -24,9 +24,9 @@ defs = Definitions(
assets=apply_automation(
[
dbt_project_assets,
funda_search_results,
funda_listing_details,
funda_price_history,
raw_funda_search_results,
raw_funda_listing_details,
raw_funda_price_history,
]
),
jobs=[funda_ingestion_job, funda_raw_quality_job, elementary_refresh_job],

View File

@@ -10,9 +10,9 @@ from data_platform.ops.check_source_freshness import (
funda_ingestion_job = define_asset_job(
name="funda_ingestion",
selection=AssetSelection.assets(
"funda_search_results",
"funda_listing_details",
"funda_price_history",
"raw_funda_search_results",
"raw_funda_listing_details",
"raw_funda_price_history",
),
description="Full Funda ingestion pipeline.",
)

View File

@@ -15,9 +15,9 @@ funda_ingestion_schedule = ScheduleDefinition(
cron_schedule="0 */4 * * *",
run_config=RunConfig(
ops={
"funda_search_results": FundaSearchConfig(),
"funda_listing_details": FundaDetailsConfig(),
"funda_price_history": FundaPriceHistoryConfig(),
"raw_funda_search_results": FundaSearchConfig(),
"raw_funda_listing_details": FundaDetailsConfig(),
"raw_funda_price_history": FundaPriceHistoryConfig(),
}
),
default_status=DefaultScheduleStatus.RUNNING,