feat: add automation
This commit is contained in:
@@ -7,6 +7,7 @@ from data_platform.assets.ingestion.funda import (
|
|||||||
funda_price_history,
|
funda_price_history,
|
||||||
funda_search_results,
|
funda_search_results,
|
||||||
)
|
)
|
||||||
|
from data_platform.helpers import apply_automation
|
||||||
from data_platform.jobs import (
|
from data_platform.jobs import (
|
||||||
elementary_refresh_job,
|
elementary_refresh_job,
|
||||||
funda_ingestion_job,
|
funda_ingestion_job,
|
||||||
@@ -20,12 +21,14 @@ from data_platform.schedules import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
defs = Definitions(
|
defs = Definitions(
|
||||||
assets=[
|
assets=apply_automation(
|
||||||
dbt_project_assets,
|
[
|
||||||
funda_search_results,
|
dbt_project_assets,
|
||||||
funda_listing_details,
|
funda_search_results,
|
||||||
funda_price_history,
|
funda_listing_details,
|
||||||
],
|
funda_price_history,
|
||||||
|
]
|
||||||
|
),
|
||||||
jobs=[funda_ingestion_job, funda_raw_quality_job, elementary_refresh_job],
|
jobs=[funda_ingestion_job, funda_raw_quality_job, elementary_refresh_job],
|
||||||
schedules=[
|
schedules=[
|
||||||
funda_ingestion_schedule,
|
funda_ingestion_schedule,
|
||||||
|
|||||||
@@ -1,66 +1,21 @@
|
|||||||
"""Shared helper utilities."""
|
"""Shared helper utilities."""
|
||||||
|
|
||||||
import json
|
from data_platform.helpers.automation import apply_automation
|
||||||
from pathlib import Path
|
from data_platform.helpers.formatting import (
|
||||||
|
format_area,
|
||||||
|
format_euro,
|
||||||
|
md_preview_table,
|
||||||
|
safe,
|
||||||
|
safe_int,
|
||||||
|
)
|
||||||
|
from data_platform.helpers.sql import render_sql
|
||||||
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
__all__ = [
|
||||||
|
"apply_automation",
|
||||||
|
"format_area",
|
||||||
def render_sql(sql_dir: Path, path: str, **kwargs: object) -> str:
|
"format_euro",
|
||||||
"""Load and render a Jinja2 SQL template relative to sql_dir."""
|
"md_preview_table",
|
||||||
env = Environment(loader=FileSystemLoader(str(sql_dir)), autoescape=False)
|
"render_sql",
|
||||||
return env.get_template(path).render(**kwargs)
|
"safe",
|
||||||
|
"safe_int",
|
||||||
|
]
|
||||||
def safe(val):
|
|
||||||
"""Convert non-serialisable values for JSONB storage."""
|
|
||||||
if isinstance(val, list | dict | tuple):
|
|
||||||
return json.dumps(val, default=str)
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
|
||||||
def safe_int(val):
|
|
||||||
"""Try to cast to int, return None on failure."""
|
|
||||||
if val is None:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
return int(val)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
try:
|
|
||||||
return int(float(val))
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def md_preview_table(
|
|
||||||
rows: list[dict],
|
|
||||||
columns: list[tuple[str, str]],
|
|
||||||
formatters: dict[str, callable] | None = None,
|
|
||||||
) -> str:
|
|
||||||
"""Build a markdown table from a list of row dicts."""
|
|
||||||
formatters = formatters or {}
|
|
||||||
headers = [label for _, label in columns]
|
|
||||||
lines = [
|
|
||||||
"| " + " | ".join(headers) + " |",
|
|
||||||
"| " + " | ".join("---" for _ in headers) + " |",
|
|
||||||
]
|
|
||||||
for r in rows:
|
|
||||||
cells = []
|
|
||||||
for key, _ in columns:
|
|
||||||
val = r.get(key)
|
|
||||||
if key in formatters:
|
|
||||||
cells.append(formatters[key](val))
|
|
||||||
else:
|
|
||||||
cells.append(str(val) if val is not None else "–")
|
|
||||||
lines.append("| " + " | ".join(cells) + " |")
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
def format_euro(val) -> str:
|
|
||||||
"""Format an integer as €-prefixed, or '–'."""
|
|
||||||
return f"€{val:,}" if val else "–"
|
|
||||||
|
|
||||||
|
|
||||||
def format_area(val) -> str:
|
|
||||||
"""Format an integer as m², or '–'."""
|
|
||||||
return f"{val} m²" if val else "–"
|
|
||||||
|
|||||||
43
data_platform/helpers/automation.py
Normal file
43
data_platform/helpers/automation.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""Global automation condition for all assets.
|
||||||
|
|
||||||
|
Applies eager auto-materialization to every asset except those tagged "manual".
|
||||||
|
Also prevents duplicate runs by skipping assets that have any dependencies
|
||||||
|
currently in progress.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from dagster import AssetsDefinition, AutomationCondition
|
||||||
|
|
||||||
|
# Eager: materialize when any dependency updates, but skip when any upstream
|
||||||
|
# dependency is still in progress to avoid wasted / duplicate runs.
|
||||||
|
AUTOMATION_CONDITION = (
|
||||||
|
AutomationCondition.eager() & ~AutomationCondition.any_deps_in_progress()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_automation(
|
||||||
|
assets: Iterable[AssetsDefinition],
|
||||||
|
) -> list[AssetsDefinition]:
|
||||||
|
"""Return a new list with the global automation condition applied.
|
||||||
|
|
||||||
|
Assets (or individual specs inside multi-asset groups) tagged ``"manual"``
|
||||||
|
are left untouched and will only run when triggered explicitly.
|
||||||
|
"""
|
||||||
|
result: list[AssetsDefinition] = []
|
||||||
|
for asset_def in assets:
|
||||||
|
if _is_manual(asset_def):
|
||||||
|
result.append(asset_def)
|
||||||
|
else:
|
||||||
|
result.append(
|
||||||
|
asset_def.with_attributes(automation_condition=AUTOMATION_CONDITION)
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _is_manual(asset_def: AssetsDefinition) -> bool:
|
||||||
|
"""Check whether *any* spec in the asset definition carries a ``manual`` tag."""
|
||||||
|
for spec in asset_def.specs:
|
||||||
|
if spec.tags.get("manual"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
57
data_platform/helpers/formatting.py
Normal file
57
data_platform/helpers/formatting.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
"""Data formatting and serialisation helpers."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def safe(val):
|
||||||
|
"""Convert non-serialisable values for JSONB storage."""
|
||||||
|
if isinstance(val, list | dict | tuple):
|
||||||
|
return json.dumps(val, default=str)
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
def safe_int(val):
|
||||||
|
"""Try to cast to int, return None on failure."""
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
try:
|
||||||
|
return int(float(val))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def md_preview_table(
|
||||||
|
rows: list[dict],
|
||||||
|
columns: list[tuple[str, str]],
|
||||||
|
formatters: dict[str, callable] | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Build a markdown table from a list of row dicts."""
|
||||||
|
formatters = formatters or {}
|
||||||
|
headers = [label for _, label in columns]
|
||||||
|
lines = [
|
||||||
|
"| " + " | ".join(headers) + " |",
|
||||||
|
"| " + " | ".join("---" for _ in headers) + " |",
|
||||||
|
]
|
||||||
|
for r in rows:
|
||||||
|
cells = []
|
||||||
|
for key, _ in columns:
|
||||||
|
val = r.get(key)
|
||||||
|
if key in formatters:
|
||||||
|
cells.append(formatters[key](val))
|
||||||
|
else:
|
||||||
|
cells.append(str(val) if val is not None else "–")
|
||||||
|
lines.append("| " + " | ".join(cells) + " |")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def format_euro(val) -> str:
|
||||||
|
"""Format an integer as €-prefixed, or '–'."""
|
||||||
|
return f"€{val:,}" if val else "–"
|
||||||
|
|
||||||
|
|
||||||
|
def format_area(val) -> str:
|
||||||
|
"""Format an integer as m², or '–'."""
|
||||||
|
return f"{val} m²" if val else "–"
|
||||||
11
data_platform/helpers/sql.py
Normal file
11
data_platform/helpers/sql.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
"""SQL rendering utilities."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
|
|
||||||
|
def render_sql(sql_dir: Path, path: str, **kwargs: object) -> str:
|
||||||
|
"""Load and render a Jinja2 SQL template relative to sql_dir."""
|
||||||
|
env = Environment(loader=FileSystemLoader(str(sql_dir)), autoescape=False)
|
||||||
|
return env.get_template(path).render(**kwargs)
|
||||||
Reference in New Issue
Block a user