48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
from dagster import (AssetKey, AssetMaterialization, OpExecutionContext,
|
|
define_asset_job, job, op)
|
|
|
|
from .assets import deals
|
|
|
|
deals_job = define_asset_job(
|
|
"deals_job", selection=[deals], partitions_def=deals.partitions_def
|
|
)
|
|
|
|
|
|
@op
|
|
def check_partititions(context: OpExecutionContext):
|
|
# Replace with your asset/job name
|
|
asset_key = "deals"
|
|
|
|
context.log_event(
|
|
AssetMaterialization(asset_key=asset_key, partition="2024-09-30|sounds")
|
|
)
|
|
|
|
# Fetch the materializations for the asset key
|
|
materializations = context.instance.get_materialized_partitions(
|
|
asset_key=AssetKey(asset_key)
|
|
)
|
|
context.log.info("Existing partitions", extra=dict(partitions=materializations))
|
|
|
|
import polars as pl
|
|
|
|
storage_dir = context.instance.storage_directory()
|
|
ic(storage_dir)
|
|
for row in (
|
|
pl.scan_parquet(f"{storage_dir}/{asset_key}/*/*.parquet")
|
|
.select(["date", "source"])
|
|
.unique()
|
|
.collect()
|
|
.iter_rows()
|
|
):
|
|
partition = "|".join(row)
|
|
if partition not in materializations:
|
|
context.log.info(f"Missing partition: {partition}")
|
|
context.log_event(
|
|
AssetMaterialization(asset_key=asset_key, partition=partition)
|
|
)
|
|
|
|
|
|
@job
|
|
def check_partititions_job():
|
|
check_partititions()
|