feat: add cleanup to elementary op
This commit is contained in:
@@ -10,7 +10,17 @@ from sqlalchemy import create_engine, text
|
||||
_DBT_DIR = Path(__file__).parents[2] / "dbt"
|
||||
|
||||
|
||||
def _elementary_schema_exists() -> bool:
|
||||
_DAYS_BACK = 3
|
||||
|
||||
_CLEANUP_TABLES = [
|
||||
"elementary_test_results",
|
||||
"dbt_run_results",
|
||||
"dbt_invocations",
|
||||
"dbt_source_freshness_results",
|
||||
]
|
||||
|
||||
|
||||
def _get_engine():
|
||||
url = "postgresql://{user}:{password}@{host}:{port}/{dbname}".format(
|
||||
user=os.environ["POSTGRES_USER"],
|
||||
password=os.environ["POSTGRES_PASSWORD"],
|
||||
@@ -18,12 +28,16 @@ def _elementary_schema_exists() -> bool:
|
||||
port=os.environ.get("POSTGRES_PORT", "5432"),
|
||||
dbname=os.environ["POSTGRES_DB"],
|
||||
)
|
||||
engine = create_engine(
|
||||
return create_engine(
|
||||
url,
|
||||
pool_pre_ping=True,
|
||||
connect_args={"connect_timeout": 10},
|
||||
)
|
||||
|
||||
|
||||
def _elementary_schema_exists() -> bool:
|
||||
engine = _get_engine()
|
||||
|
||||
from data_platform.resources import _retry_on_operational_error
|
||||
|
||||
def _query():
|
||||
@@ -69,9 +83,34 @@ def elementary_run_models(context: OpExecutionContext) -> None:
|
||||
raise Exception(f"dbt run elementary failed with exit code {returncode}")
|
||||
|
||||
|
||||
def _cleanup_old_elementary_data(context: OpExecutionContext) -> None:
|
||||
"""Delete elementary rows older than _DAYS_BACK to prevent OOM during report generation."""
|
||||
engine = _get_engine()
|
||||
total = 0
|
||||
with engine.begin() as conn:
|
||||
for table in _CLEANUP_TABLES:
|
||||
result = conn.execute(
|
||||
text(
|
||||
f"DELETE FROM elementary.{table} " # noqa: S608
|
||||
f"WHERE created_at < now() - interval '{_DAYS_BACK} days'"
|
||||
)
|
||||
)
|
||||
if result.rowcount:
|
||||
context.log.info(
|
||||
f"Cleaned up {result.rowcount} old rows from elementary.{table}"
|
||||
)
|
||||
total += result.rowcount
|
||||
if total:
|
||||
context.log.info(f"Total rows cleaned: {total}")
|
||||
else:
|
||||
context.log.info("No old elementary data to clean up.")
|
||||
|
||||
|
||||
@op(ins={"after": In(Nothing)})
|
||||
def elementary_generate_report(context: OpExecutionContext) -> None:
|
||||
"""Run edr report to regenerate the Elementary HTML report."""
|
||||
_cleanup_old_elementary_data(context)
|
||||
|
||||
report_path = (
|
||||
Path(__file__).parents[2] / "dbt" / "edr_target" / "elementary_report.html"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user