This commit is contained in:
2024-10-14 12:49:32 +02:00
parent a7d8dfdbd3
commit ebef914be6
11 changed files with 138 additions and 142 deletions

View File

@@ -3,16 +3,11 @@ from glob import glob
import polars as pl
import structlog
from dagster import (
AssetIn,
DailyPartitionsDefinition,
DimensionPartitionMapping,
IdentityPartitionMapping,
MultiPartitionMapping,
MultiPartitionsDefinition,
StaticPartitionsDefinition,
TimeWindowPartitionMapping,
asset, Failure, Field, )
from dagster import (AssetIn, DailyPartitionsDefinition,
DimensionPartitionMapping, Failure, Field,
IdentityPartitionMapping, MultiPartitionMapping,
MultiPartitionsDefinition, StaticPartitionsDefinition,
TimeWindowPartitionMapping, asset)
from app.vinyl.plato.check_plato import scrape_plato
from app.vinyl.sounds.fetch import fetch_deals
@@ -48,7 +43,9 @@ partition_mapping = MultiPartitionMapping(
metadata={
"partition_by": ["date", "source"],
},
config_schema={"import_dir": Field(str, default_value="/opt/dagster/home/storage/import")},
config_schema={
"import_dir": Field(str, default_value="/opt/dagster/home/storage/import")
},
)
def deals(context):
ic()
@@ -74,9 +71,11 @@ def deals(context):
file = sorted(files)[-1]
logger.info("Using existing CSV file", file=file)
try:
df = pl.read_csv(file)[["id", "name", "price"]]
df = pl.read_csv(file)
logger.info("Loaded CSV file", rows=len(df))
return df.with_columns(**{k: pl.lit(v) for k, v in partition_key.items()})
return df.with_columns(
**{k: pl.lit(v) for k, v in partition_key.items()}
)
except Exception as e:
logger.error("Failed to load CSV file!", error=e)
raise Failure(f"Cannot materialize for the past: {date.date()}")
@@ -91,7 +90,6 @@ def deals(context):
logger.info("Scraping Sounds")
df = fetch_deals()
ic(df.columns)
df = df[["id", "name", "price"]]
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
return pl.from_pandas(df.assign(**partition_key))