feat: finetune elementary implementation

This commit is contained in:
Stijnvandenbroek
2026-03-04 22:26:35 +00:00
parent 0b9b408714
commit 5166a183b4
5 changed files with 57 additions and 7 deletions

View File

@@ -13,7 +13,7 @@ dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
dbt_project.prepare_if_dev() dbt_project.prepare_if_dev()
@dbt_assets(manifest=dbt_project.manifest_path) @dbt_assets(manifest=dbt_project.manifest_path, exclude="package:elementary")
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource): def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
"""Expose every dbt model as a Dagster asset.""" """Expose every dbt model as a Dagster asset."""
yield from dbt.cli(["build"], context=context).stream() yield from dbt.cli(["build"], context=context).stream()

View File

@@ -2,9 +2,14 @@
from dagster import job from dagster import job
from data_platform.ops.elementary import elementary_generate_report from data_platform.ops.elementary import (
elementary_generate_report,
elementary_run_models,
)
@job(description="Regenerate the Elementary data observability report.") @job(
description="Ensure Elementary models exist, then regenerate the observability report."
)
def elementary_refresh_job(): def elementary_refresh_job():
elementary_generate_report() elementary_generate_report(after=elementary_run_models())

View File

@@ -2,10 +2,14 @@ from data_platform.ops.check_source_freshness import (
SourceFreshnessConfig, SourceFreshnessConfig,
check_source_freshness, check_source_freshness,
) )
from data_platform.ops.elementary import elementary_generate_report from data_platform.ops.elementary import (
elementary_generate_report,
elementary_run_models,
)
__all__ = [ __all__ = [
"check_source_freshness", "check_source_freshness",
"SourceFreshnessConfig", "SourceFreshnessConfig",
"elementary_run_models",
"elementary_generate_report", "elementary_generate_report",
] ]

View File

@@ -1,14 +1,52 @@
"""Elementary report generation op.""" """Elementary ops."""
import os
import subprocess import subprocess
from pathlib import Path from pathlib import Path
from dagster import OpExecutionContext, op from dagster import In, Nothing, OpExecutionContext, op
from dagster_dbt import DbtCliResource
from sqlalchemy import create_engine, text
_DBT_DIR = Path(__file__).parents[2] / "dbt" _DBT_DIR = Path(__file__).parents[2] / "dbt"
def _elementary_schema_exists() -> bool:
url = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
user=os.environ["POSTGRES_USER"],
password=os.environ["POSTGRES_PASSWORD"],
host=os.environ.get("POSTGRES_HOST", "localhost"),
port=os.environ.get("POSTGRES_PORT", "5432"),
dbname=os.environ["POSTGRES_DB"],
)
engine = create_engine(url)
with engine.connect() as conn:
return bool(
conn.execute(
text(
"SELECT 1 FROM information_schema.schemata WHERE schema_name = 'elementary'"
)
).scalar()
)
@op @op
def elementary_run_models(context: OpExecutionContext, dbt: DbtCliResource) -> None:
"""Run Elementary dbt models only if the elementary schema does not exist yet."""
if _elementary_schema_exists():
context.log.info("Elementary schema already exists, skipping model run.")
return
context.log.info("Elementary schema not found, running dbt models.")
list(
dbt.cli(
["run", "--select", "elementary"],
context=context,
).stream()
)
@op(ins={"after": In(Nothing)})
def elementary_generate_report(context: OpExecutionContext) -> None: def elementary_generate_report(context: OpExecutionContext) -> None:
"""Run edr report to regenerate the Elementary HTML report.""" """Run edr report to regenerate the Elementary HTML report."""
cmd = [ cmd = [

View File

@@ -1,6 +1,9 @@
#!/bin/sh #!/bin/sh
set -e set -e
echo "Installing dbt packages..."
dbt deps --profiles-dir /app/dbt --project-dir /app/dbt
echo "Generating dbt manifest..." echo "Generating dbt manifest..."
dbt parse --profiles-dir /app/dbt --project-dir /app/dbt dbt parse --profiles-dir /app/dbt --project-dir /app/dbt