refactor to allow for multiple code locations

This commit is contained in:
2025-07-20 19:49:30 +02:00
parent 9b8cfabee5
commit fd73e1367c
40 changed files with 161 additions and 628 deletions

58
apps/vinyl/src/jobs.py Normal file
View File

@@ -0,0 +1,58 @@
from assets import deals, new_deals, works
from dagster import (
AssetKey,
AssetMaterialization,
OpExecutionContext,
define_asset_job,
job,
op,
)
deals_job = define_asset_job(
"deals_job", selection=[deals], partitions_def=deals.partitions_def
)
@op
def check_partititions(context: OpExecutionContext):
# Replace with your asset/job name
asset_key = "deals"
context.log_event(
AssetMaterialization(asset_key=asset_key, partition="2024-09-30|sounds")
)
# Fetch the materializations for the asset key
materializations = context.instance.get_materialized_partitions(
asset_key=AssetKey(asset_key)
)
context.log.info("Existing partitions", extra=dict(partitions=materializations))
import polars as pl
storage_dir = context.instance.storage_directory()
ic(storage_dir)
for row in (
pl.scan_parquet(f"{storage_dir}/{asset_key}/*/*.parquet")
.select(["date", "source"])
.unique()
.collect()
.iter_rows()
):
partition = "|".join(row)
if partition not in materializations:
context.log.info(f"Missing partition: {partition}")
context.log_event(
AssetMaterialization(asset_key=asset_key, partition=partition)
)
@job
def check_partititions_job():
check_partititions()
musicbrainz_lookup_job = define_asset_job(
"musicbrainz_lookup_job", selection=[works, new_deals]
)