feat: finetune elementary implementation

This commit is contained in:
Stijnvandenbroek
2026-03-04 22:26:35 +00:00
parent 0b9b408714
commit 5166a183b4
5 changed files with 57 additions and 7 deletions

View File

@@ -13,7 +13,7 @@ dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
dbt_project.prepare_if_dev()
@dbt_assets(manifest=dbt_project.manifest_path)
@dbt_assets(manifest=dbt_project.manifest_path, exclude="package:elementary")
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
"""Expose every dbt model as a Dagster asset."""
yield from dbt.cli(["build"], context=context).stream()

View File

@@ -2,9 +2,14 @@
from dagster import job
from data_platform.ops.elementary import elementary_generate_report
from data_platform.ops.elementary import (
elementary_generate_report,
elementary_run_models,
)
@job(description="Regenerate the Elementary data observability report.")
@job(
description="Ensure Elementary models exist, then regenerate the observability report."
)
def elementary_refresh_job():
elementary_generate_report()
elementary_generate_report(after=elementary_run_models())

View File

@@ -2,10 +2,14 @@ from data_platform.ops.check_source_freshness import (
SourceFreshnessConfig,
check_source_freshness,
)
from data_platform.ops.elementary import elementary_generate_report
from data_platform.ops.elementary import (
elementary_generate_report,
elementary_run_models,
)
__all__ = [
"check_source_freshness",
"SourceFreshnessConfig",
"elementary_run_models",
"elementary_generate_report",
]

View File

@@ -1,14 +1,52 @@
"""Elementary report generation op."""
"""Elementary ops."""
import os
import subprocess
from pathlib import Path
from dagster import OpExecutionContext, op
from dagster import In, Nothing, OpExecutionContext, op
from dagster_dbt import DbtCliResource
from sqlalchemy import create_engine, text
_DBT_DIR = Path(__file__).parents[2] / "dbt"
def _elementary_schema_exists() -> bool:
url = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
user=os.environ["POSTGRES_USER"],
password=os.environ["POSTGRES_PASSWORD"],
host=os.environ.get("POSTGRES_HOST", "localhost"),
port=os.environ.get("POSTGRES_PORT", "5432"),
dbname=os.environ["POSTGRES_DB"],
)
engine = create_engine(url)
with engine.connect() as conn:
return bool(
conn.execute(
text(
"SELECT 1 FROM information_schema.schemata WHERE schema_name = 'elementary'"
)
).scalar()
)
@op
def elementary_run_models(context: OpExecutionContext, dbt: DbtCliResource) -> None:
"""Run Elementary dbt models only if the elementary schema does not exist yet."""
if _elementary_schema_exists():
context.log.info("Elementary schema already exists, skipping model run.")
return
context.log.info("Elementary schema not found, running dbt models.")
list(
dbt.cli(
["run", "--select", "elementary"],
context=context,
).stream()
)
@op(ins={"after": In(Nothing)})
def elementary_generate_report(context: OpExecutionContext) -> None:
"""Run edr report to regenerate the Elementary HTML report."""
cmd = [