feat: add cleanup to elementary op

This commit is contained in:
Stijnvandenbroek
2026-03-12 14:00:28 +00:00
parent 5ffb474d69
commit 9868ca1127
2 changed files with 86 additions and 5 deletions

View File

@@ -10,7 +10,17 @@ from sqlalchemy import create_engine, text
_DBT_DIR = Path(__file__).parents[2] / "dbt"
def _elementary_schema_exists() -> bool:
_DAYS_BACK = 3
_CLEANUP_TABLES = [
"elementary_test_results",
"dbt_run_results",
"dbt_invocations",
"dbt_source_freshness_results",
]
def _get_engine():
url = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
user=os.environ["POSTGRES_USER"],
password=os.environ["POSTGRES_PASSWORD"],
@@ -18,12 +28,16 @@ def _elementary_schema_exists() -> bool:
port=os.environ.get("POSTGRES_PORT", "5432"),
dbname=os.environ["POSTGRES_DB"],
)
engine = create_engine(
return create_engine(
url,
pool_pre_ping=True,
connect_args={"connect_timeout": 10},
)
def _elementary_schema_exists() -> bool:
engine = _get_engine()
from data_platform.resources import _retry_on_operational_error
def _query():
@@ -69,9 +83,34 @@ def elementary_run_models(context: OpExecutionContext) -> None:
raise Exception(f"dbt run elementary failed with exit code {returncode}")
def _cleanup_old_elementary_data(context: OpExecutionContext) -> None:
"""Delete elementary rows older than _DAYS_BACK to prevent OOM during report generation."""
engine = _get_engine()
total = 0
with engine.begin() as conn:
for table in _CLEANUP_TABLES:
result = conn.execute(
text(
f"DELETE FROM elementary.{table} " # noqa: S608
f"WHERE created_at < now() - interval '{_DAYS_BACK} days'"
)
)
if result.rowcount:
context.log.info(
f"Cleaned up {result.rowcount} old rows from elementary.{table}"
)
total += result.rowcount
if total:
context.log.info(f"Total rows cleaned: {total}")
else:
context.log.info("No old elementary data to clean up.")
@op(ins={"after": In(Nothing)})
def elementary_generate_report(context: OpExecutionContext) -> None:
"""Run edr report to regenerate the Elementary HTML report."""
_cleanup_old_elementary_data(context)
report_path = (
Path(__file__).parents[2] / "dbt" / "edr_target" / "elementary_report.html"
)