rewrite parsing of deals

This commit is contained in:
2025-07-26 16:28:26 +02:00
parent 8a80adcd27
commit fb2e90d47d
11 changed files with 238 additions and 114 deletions

View File

@@ -1,30 +1,30 @@
import logging
import warnings
from datetime import datetime
from typing import Any
from assets import deals
from assets import cleaned_deals, deals, new_deals
from dagster_polars import PolarsParquetIOManager
from jobs import check_partititions_job
from definitions import definitions
from jobs import check_partitions_job
from dagster import materialize
import dagster as dg
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=dg.ExperimentalWarning)
logging.getLogger().setLevel(logging.INFO)
resources = {
"polars_parquet_io_manager": PolarsParquetIOManager(base_dir="/opt/dagster/storage")
}
def today_str():
"""Returns today's date as a string in the format YYYY-MM-DD."""
return datetime.today().strftime("%Y-%m-%d")
def test_deals(source="sounds", date: str = None):
if not date:
today = datetime.today().strftime("%Y-%m-%d")
date = today
result = materialize(
[deals],
partition_key=f"{date}|{source}",
def test_deals(resources: dict[str, Any], source="sounds", date: str = None):
result = dg.materialize(
assets=definitions.assets,
selection=[deals.key],
partition_key=f"{date or today_str()}|{source}",
resources=resources,
run_config={
"loggers": {"console": {"config": {"log_level": "ERROR"}}},
@@ -32,9 +32,35 @@ def test_deals(source="sounds", date: str = None):
},
)
assert result.success
ic(result.asset_value)
if __name__ == "__main__":
# test_deals(source="plato")
check_partititions_job.execute_in_process()
run = 4
resources = {
"polars_parquet_io_manager": PolarsParquetIOManager(
base_dir="/opt/dagster/storage"
)
}
source = "sounds" # or "plato"
match run:
case 1:
check_partitions_job.execute_in_process(resources=resources)
case 2:
test_deals(resources, source=source)
case 3:
dg.materialize(
assets=definitions.assets,
selection=[new_deals.key],
partition_key=today_str(),
resources=resources,
)
case 4:
dg.materialize(
assets=definitions.assets,
selection=[cleaned_deals.key],
partition_key=f"{today_str()}|{source}",
resources=resources,
)
case _:
raise ValueError("Invalid run number")