parse platenzaak deals
This commit is contained in:
@@ -12,6 +12,7 @@ from dagster_polars.patito import patito_model_to_dagster_type
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from models import Deal
|
||||
from partitions import daily_partitions_def, multi_partitions_def
|
||||
from platenzaak.parse import parse as parse_platenzaak
|
||||
from platenzaak.scrape import scrape as scrape_platenzaak
|
||||
from plato.parse import parse as parse_plato
|
||||
from plato.scrape import scrape as scrape_plato
|
||||
@@ -65,26 +66,24 @@ def deals(context: dg.AssetExecutionContext) -> pl.DataFrame:
|
||||
logger.error("Failed to load CSV file!", error=e)
|
||||
raise dg.Failure(f"Cannot materialize for the past: {date.date()}")
|
||||
|
||||
if source == "plato":
|
||||
logger.info("Scraping Plato")
|
||||
df = scrape_plato()
|
||||
logger.info("Scraped Plato", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
if source == "sounds":
|
||||
logger.info("Scraping Sounds")
|
||||
df = scrape_sounds()
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
if source == "platenzaak":
|
||||
logger.info("Scraping Platenzaak")
|
||||
df = scrape_platenzaak(logger=logger)
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
match source:
|
||||
case "plato":
|
||||
logger.info("Scraping Plato")
|
||||
df = scrape_plato()
|
||||
logger.info("Scraped Plato", rows=len(df), head=df.head().to_markdown())
|
||||
case "sounds":
|
||||
logger.info("Scraping Sounds")
|
||||
df = scrape_sounds()
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
case "platenzaak":
|
||||
logger.info("Scraping Platenzaak")
|
||||
df = scrape_platenzaak(logger=logger)
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
case _:
|
||||
raise ValueError(f"Unknown source: {source}!")
|
||||
|
||||
raise NotImplementedError(f"No implementation for source {source}")
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
|
||||
|
||||
@asset(
|
||||
@@ -111,9 +110,10 @@ def cleaned_deals(
|
||||
parsed_df = parse_plato(df)
|
||||
case "sounds":
|
||||
parsed_df = parse_sounds(df)
|
||||
case "platenzaak":
|
||||
parsed_df = parse_platenzaak(df)
|
||||
case _:
|
||||
context.log.warning(f"Unknown source: {source}!")
|
||||
return
|
||||
raise ValueError(f"Unknown source: {source}!")
|
||||
|
||||
ic(parsed_df.collect_schema())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user