handle missing partitions
This commit is contained in:
@@ -84,14 +84,16 @@ def deals(context: dg.AssetExecutionContext) -> pl.DataFrame:
|
||||
io_manager_key="polars_parquet_io_manager",
|
||||
partitions_def=deals.partitions_def,
|
||||
ins={"df": dg.AssetIn(key=deals.key)},
|
||||
automation_condition=dg.AutomationCondition.on_missing().without(
|
||||
dg.AutomationCondition.in_latest_time_window()
|
||||
),
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
output_required=False,
|
||||
)
|
||||
def cleaned_deals(
|
||||
context: dg.AssetExecutionContext, df: pl.LazyFrame
|
||||
) -> Deal.DataFrame:
|
||||
context: dg.AssetExecutionContext, df: pl.LazyFrame | None
|
||||
) -> Iterator[dg.Output[Deal.DataFrame]]:
|
||||
"""Clean and parse deals from the raw source tables."""
|
||||
if df is None:
|
||||
return
|
||||
|
||||
ic()
|
||||
partition_keys = get_partition_keys(context)
|
||||
ic(partition_keys)
|
||||
@@ -104,27 +106,27 @@ def cleaned_deals(
|
||||
parsed_df = parse_sounds(df)
|
||||
case _:
|
||||
context.log.warning(f"Unknown source: {source}!")
|
||||
return Deal.DataFrame()
|
||||
return
|
||||
|
||||
ic(parsed_df.collect_schema())
|
||||
|
||||
# Deduplicate and sort the DataFrame
|
||||
columns = ["source", "id", "artist", "title", "price"]
|
||||
return Deal.DataFrame(
|
||||
parsed_df.sort("date", descending=True)
|
||||
.unique(subset=columns, keep="first")
|
||||
.sort("date", descending=False)
|
||||
.select(*columns, "date", "release", "url")
|
||||
.collect()
|
||||
yield dg.Output(
|
||||
Deal.DataFrame(
|
||||
parsed_df.sort("date", descending=True)
|
||||
.unique(subset=columns, keep="first")
|
||||
.sort("date", descending=False)
|
||||
.select(*columns, "date", "release", "url")
|
||||
.collect()
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@asset(
|
||||
deps=[cleaned_deals],
|
||||
io_manager_key="polars_parquet_io_manager",
|
||||
automation_condition=dg.AutomationCondition.on_missing().without(
|
||||
dg.AutomationCondition.in_latest_time_window()
|
||||
),
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
)
|
||||
def works(context: dg.AssetExecutionContext) -> pl.DataFrame | None:
|
||||
"""Aggregate works from cleaned deals."""
|
||||
@@ -167,15 +169,21 @@ def works(context: dg.AssetExecutionContext) -> pl.DataFrame | None:
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
)
|
||||
def new_deals(
|
||||
context: dg.AssetExecutionContext, partitions: dict[str, pl.LazyFrame]
|
||||
context: dg.AssetExecutionContext, partitions: dict[str, pl.LazyFrame | None]
|
||||
) -> Iterator[dg.Output[Deal.DataFrame]]:
|
||||
"""Fetch new deals from all sources."""
|
||||
ic()
|
||||
partition_keys = get_partition_keys(context)
|
||||
parsed_partition_keys = parse_partition_keys(context, "partitions")
|
||||
ic(partitions.keys())
|
||||
if not (partitions := {k: v for k, v in partitions.items() if v is not None}):
|
||||
return
|
||||
|
||||
ic(partitions.keys())
|
||||
partition_keys = get_partition_keys(context)
|
||||
ic(partition_keys)
|
||||
|
||||
parsed_partition_keys = parse_partition_keys(context, "partitions")
|
||||
ic(parsed_partition_keys)
|
||||
|
||||
if len(partition_keys := sorted(partitions.keys())) < 2:
|
||||
context.log.warning("Not enough partitions to fetch new deals!")
|
||||
|
||||
@@ -245,7 +253,7 @@ def good_deals(
|
||||
]
|
||||
|
||||
# Render HTML from Jinja template
|
||||
env = Environment(loader=FileSystemLoader(".."))
|
||||
env = Environment(loader=FileSystemLoader(f"/apps/{APP}"))
|
||||
template = env.get_template("email.html")
|
||||
html_content = template.render(deals=deals)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user