feat: finetune elementary implementation
This commit is contained in:
@@ -13,7 +13,7 @@ dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
|
|||||||
dbt_project.prepare_if_dev()
|
dbt_project.prepare_if_dev()
|
||||||
|
|
||||||
|
|
||||||
@dbt_assets(manifest=dbt_project.manifest_path)
|
@dbt_assets(manifest=dbt_project.manifest_path, exclude="package:elementary")
|
||||||
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
def dbt_project_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
||||||
"""Expose every dbt model as a Dagster asset."""
|
"""Expose every dbt model as a Dagster asset."""
|
||||||
yield from dbt.cli(["build"], context=context).stream()
|
yield from dbt.cli(["build"], context=context).stream()
|
||||||
|
|||||||
@@ -2,9 +2,14 @@
|
|||||||
|
|
||||||
from dagster import job
|
from dagster import job
|
||||||
|
|
||||||
from data_platform.ops.elementary import elementary_generate_report
|
from data_platform.ops.elementary import (
|
||||||
|
elementary_generate_report,
|
||||||
|
elementary_run_models,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@job(description="Regenerate the Elementary data observability report.")
|
@job(
|
||||||
|
description="Ensure Elementary models exist, then regenerate the observability report."
|
||||||
|
)
|
||||||
def elementary_refresh_job():
|
def elementary_refresh_job():
|
||||||
elementary_generate_report()
|
elementary_generate_report(after=elementary_run_models())
|
||||||
|
|||||||
@@ -2,10 +2,14 @@ from data_platform.ops.check_source_freshness import (
|
|||||||
SourceFreshnessConfig,
|
SourceFreshnessConfig,
|
||||||
check_source_freshness,
|
check_source_freshness,
|
||||||
)
|
)
|
||||||
from data_platform.ops.elementary import elementary_generate_report
|
from data_platform.ops.elementary import (
|
||||||
|
elementary_generate_report,
|
||||||
|
elementary_run_models,
|
||||||
|
)
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"check_source_freshness",
|
"check_source_freshness",
|
||||||
"SourceFreshnessConfig",
|
"SourceFreshnessConfig",
|
||||||
|
"elementary_run_models",
|
||||||
"elementary_generate_report",
|
"elementary_generate_report",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,14 +1,52 @@
|
|||||||
"""Elementary report generation op."""
|
"""Elementary ops."""
|
||||||
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from dagster import OpExecutionContext, op
|
from dagster import In, Nothing, OpExecutionContext, op
|
||||||
|
from dagster_dbt import DbtCliResource
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
|
||||||
_DBT_DIR = Path(__file__).parents[2] / "dbt"
|
_DBT_DIR = Path(__file__).parents[2] / "dbt"
|
||||||
|
|
||||||
|
|
||||||
|
def _elementary_schema_exists() -> bool:
|
||||||
|
url = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
|
||||||
|
user=os.environ["POSTGRES_USER"],
|
||||||
|
password=os.environ["POSTGRES_PASSWORD"],
|
||||||
|
host=os.environ.get("POSTGRES_HOST", "localhost"),
|
||||||
|
port=os.environ.get("POSTGRES_PORT", "5432"),
|
||||||
|
dbname=os.environ["POSTGRES_DB"],
|
||||||
|
)
|
||||||
|
engine = create_engine(url)
|
||||||
|
with engine.connect() as conn:
|
||||||
|
return bool(
|
||||||
|
conn.execute(
|
||||||
|
text(
|
||||||
|
"SELECT 1 FROM information_schema.schemata WHERE schema_name = 'elementary'"
|
||||||
|
)
|
||||||
|
).scalar()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@op
|
@op
|
||||||
|
def elementary_run_models(context: OpExecutionContext, dbt: DbtCliResource) -> None:
|
||||||
|
"""Run Elementary dbt models only if the elementary schema does not exist yet."""
|
||||||
|
if _elementary_schema_exists():
|
||||||
|
context.log.info("Elementary schema already exists, skipping model run.")
|
||||||
|
return
|
||||||
|
|
||||||
|
context.log.info("Elementary schema not found, running dbt models.")
|
||||||
|
list(
|
||||||
|
dbt.cli(
|
||||||
|
["run", "--select", "elementary"],
|
||||||
|
context=context,
|
||||||
|
).stream()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@op(ins={"after": In(Nothing)})
|
||||||
def elementary_generate_report(context: OpExecutionContext) -> None:
|
def elementary_generate_report(context: OpExecutionContext) -> None:
|
||||||
"""Run edr report to regenerate the Elementary HTML report."""
|
"""Run edr report to regenerate the Elementary HTML report."""
|
||||||
cmd = [
|
cmd = [
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
echo "Installing dbt packages..."
|
||||||
|
dbt deps --profiles-dir /app/dbt --project-dir /app/dbt
|
||||||
|
|
||||||
echo "Generating dbt manifest..."
|
echo "Generating dbt manifest..."
|
||||||
dbt parse --profiles-dir /app/dbt --project-dir /app/dbt
|
dbt parse --profiles-dir /app/dbt --project-dir /app/dbt
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user