scrape platenzaak
This commit is contained in:
@@ -12,17 +12,19 @@ from dagster_polars.patito import patito_model_to_dagster_type
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from models import Deal
|
||||
from partitions import daily_partitions_def, multi_partitions_def
|
||||
from platenzaak.scrape import scrape as scrape_platenzaak
|
||||
from plato.parse import parse as parse_plato
|
||||
from plato.scrape import scrape as scrape_plato
|
||||
from shared.utils import get_partition_keys, parse_partition_keys
|
||||
from sounds.parse import parse as parse_sounds
|
||||
from sounds.scrape import scrape as scrape_sounds
|
||||
from structlog.stdlib import BoundLogger
|
||||
from utils.email import EmailService
|
||||
|
||||
import dagster as dg
|
||||
|
||||
asset = partial(dg.asset, key_prefix=APP)
|
||||
logger = structlog.get_logger()
|
||||
logger: BoundLogger = structlog.get_logger()
|
||||
|
||||
|
||||
@asset(
|
||||
@@ -75,10 +77,14 @@ def deals(context: dg.AssetExecutionContext) -> pl.DataFrame:
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
if source == "platenzaak":
|
||||
logger.info("Scraping Platenzaak")
|
||||
df = scrape_platenzaak(logger=logger)
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
|
||||
return pl.DataFrame(
|
||||
[{"date": context.partition_key, "data": f"Data for {context.partition_key}"}]
|
||||
)
|
||||
raise NotImplementedError(f"No implementation for source {source}")
|
||||
|
||||
|
||||
@asset(
|
||||
|
||||
Reference in New Issue
Block a user