rewrite parsing of deals

This commit is contained in:
2025-07-26 16:28:26 +02:00
parent 8a80adcd27
commit fb2e90d47d
11 changed files with 238 additions and 114 deletions

0
shared/src/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,82 @@
from typing import Mapping
import dagster as dg
def get_dimension_names(
context: dg.OpExecutionContext, input_name: str = "partitions"
) -> list[str]:
"""
Extract dimension names for an input.
Args:
context: The Dagster execution context.
input_name: The name of the input to extract dimension names for (default is "partitions").
Returns:
A list of dimension names.
"""
partition_definition = context.asset_partitions_def_for_input(input_name)
if isinstance(partition_definition, dg.MultiPartitionsDefinition):
return [x.name for x in partition_definition.partitions_defs]
raise NotImplementedError("Only MultiPartitionsDefinition is supported.")
def parse_coalesced_partition_key(
coalesced_key: str, dimension_names: list[str]
) -> dict[str, str]:
"""
Parse a coalesced partition key into a dictionary of dimension values.
Args:
coalesced_key: The coalesced partition key string.
dimension_names: A list of dimension names corresponding to the parts of the key.
Returns:
A dictionary mapping dimension names to their corresponding values.
"""
parts = coalesced_key.split("|")
if len(parts) != len(dimension_names):
raise ValueError("Mismatch between dimension names and partition key parts")
return dict(zip(dimension_names, parts))
def get_partition_keys(context: dg.OpExecutionContext) -> Mapping[str, str]:
"""
Get the partition key from the execution context.
Args:
context: The Dagster execution context.
Returns:
A mapping of dimension names to their corresponding values in the partition key.
Raises:
ValueError: If the partition key is not a MultiPartitionKey.
"""
multi_partition_key = context.partition_key
if not isinstance(multi_partition_key, dg.MultiPartitionKey):
raise ValueError(
f"Expected MultiPartitionKey, got {type(context.partition_key)}: {context.partition_key}"
)
return multi_partition_key.keys_by_dimension
def parse_partition_keys(
context: dg.OpExecutionContext, input_name: str = "partitions"
) -> dict[str, dict[str, str]]:
"""
Parse partition keys for a given input.
Args:
context: The Dagster execution context.
input_name: The name of the input to parse partition keys for (default is "partitions").
Returns:
a dictionary mapping partition keys to their parsed dimension values.
"""
dimension_names = get_dimension_names(context, input_name)
return {
k: parse_coalesced_partition_key(k, dimension_names)
for k in context.asset_partition_keys_for_input(input_name)
}