implement schema check and use automation instead of sensor

This commit is contained in:
2025-07-26 17:53:12 +02:00
parent ac8759258d
commit 8d06b236b7
5 changed files with 59 additions and 64 deletions

View File

@@ -1,5 +1,5 @@
import polars as pl
from assets import deals, new_deals, works
from assets import deals
import dagster as dg
@@ -12,11 +12,11 @@ deals_job = dg.define_asset_job(
def check_partitions(context: dg.OpExecutionContext):
asset_key = "deals"
# Fetch the materializations for the asset key
materializations = context.instance.get_materialized_partitions(
# Fetch the materialized partitions for the asset key
materialized_partitions = context.instance.get_materialized_partitions(
asset_key=dg.AssetKey(asset_key)
)
ic(materializations)
ic(materialized_partitions)
storage_dir = context.resources.polars_parquet_io_manager.base_dir
ic(storage_dir)
@@ -30,7 +30,7 @@ def check_partitions(context: dg.OpExecutionContext):
.iter_rows()
):
partition = "|".join(row)
if partition not in materializations:
if partition not in materialized_partitions:
context.log.info(f"Missing partition: {partition}")
context.log_event(
dg.AssetMaterialization(asset_key=asset_key, partition=partition)
@@ -40,8 +40,3 @@ def check_partitions(context: dg.OpExecutionContext):
@dg.job
def check_partitions_job():
check_partitions()
musicbrainz_lookup_job = dg.define_asset_job(
"musicbrainz_lookup_job", selection=[works.key, new_deals.key]
)