implement schema check and use automation instead of sensor

This commit is contained in:
2025-07-26 17:53:12 +02:00
parent ac8759258d
commit 8d06b236b7
5 changed files with 59 additions and 64 deletions

View File

@@ -1,16 +1,15 @@
import logging
import warnings
from datetime import datetime
from typing import Any
from assets import cleaned_deals, deals, new_deals
from assets import cleaned_deals, deals, works
from dagster_polars import PolarsParquetIOManager
from definitions import definitions
from jobs import check_partitions_job
import dagster as dg
warnings.filterwarnings("ignore", category=dg.ExperimentalWarning)
# warnings.filterwarnings("ignore", category=dg.Ex)
logging.getLogger().setLevel(logging.INFO)
@@ -35,10 +34,10 @@ def test_deals(resources: dict[str, Any], source="sounds", date: str = None):
if __name__ == "__main__":
run = 4
run = 3
resources = {
"polars_parquet_io_manager": PolarsParquetIOManager(
base_dir="/opt/dagster/storage"
base_dir="/opt/dagster/storage/vinyl"
)
}
source = "sounds" # or "plato"
@@ -48,19 +47,19 @@ if __name__ == "__main__":
check_partitions_job.execute_in_process(resources=resources)
case 2:
test_deals(resources, source=source)
case 3:
dg.materialize(
assets=definitions.assets,
selection=[new_deals.key],
partition_key=today_str(),
resources=resources,
)
case 4:
dg.materialize(
assets=definitions.assets,
selection=[cleaned_deals.key],
partition_key=f"{today_str()}|{source}",
resources=resources,
)
case 4:
dg.materialize(
assets=definitions.assets,
selection=[works.key],
resources=resources,
)
case _:
raise ValueError("Invalid run number")
raise ValueError(f"Invalid run number: {run}!")