Files
dagster/src/app/vinyl/jobs.py
2024-10-14 12:49:32 +02:00

48 lines
1.3 KiB
Python

from dagster import (AssetKey, AssetMaterialization, OpExecutionContext,
define_asset_job, job, op)
from .assets import deals
deals_job = define_asset_job(
"deals_job", selection=[deals], partitions_def=deals.partitions_def
)
@op
def check_partititions(context: OpExecutionContext):
# Replace with your asset/job name
asset_key = "deals"
context.log_event(
AssetMaterialization(asset_key=asset_key, partition="2024-09-30|sounds")
)
# Fetch the materializations for the asset key
materializations = context.instance.get_materialized_partitions(
asset_key=AssetKey(asset_key)
)
context.log.info("Existing partitions", extra=dict(partitions=materializations))
import polars as pl
storage_dir = context.instance.storage_directory()
ic(storage_dir)
for row in (
pl.scan_parquet(f"{storage_dir}/{asset_key}/*/*.parquet")
.select(["date", "source"])
.unique()
.collect()
.iter_rows()
):
partition = "|".join(row)
if partition not in materializations:
context.log.info(f"Missing partition: {partition}")
context.log_event(
AssetMaterialization(asset_key=asset_key, partition=partition)
)
@job
def check_partititions_job():
check_partititions()