daily parsing of raw weather
This commit is contained in:
@@ -1,10 +1,14 @@
|
|||||||
|
from collections.abc import Iterator
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
import requests_cache
|
import requests_cache
|
||||||
from config import APP
|
from config import APP
|
||||||
from partitions import (
|
from partitions import (
|
||||||
|
daily_partitions_def,
|
||||||
latitude_partitions_def,
|
latitude_partitions_def,
|
||||||
location_partitions_def,
|
location_partitions_def,
|
||||||
longitude_partitions_def,
|
longitude_partitions_def,
|
||||||
@@ -120,7 +124,7 @@ def raw_weather_batch_longitude() -> None:
|
|||||||
io_manager_key="json_io_manager",
|
io_manager_key="json_io_manager",
|
||||||
partitions_def=latitude_partitions_def,
|
partitions_def=latitude_partitions_def,
|
||||||
)
|
)
|
||||||
def raw_weather_batch_latitude(context: dg.AssetExecutionContext):
|
def raw_weather_batch_latitude(context: dg.AssetExecutionContext) -> None:
|
||||||
ic(context.resources._asdict().keys()) # contains json_io_manager
|
ic(context.resources._asdict().keys()) # contains json_io_manager
|
||||||
ic(context.partition_key)
|
ic(context.partition_key)
|
||||||
|
|
||||||
@@ -164,3 +168,40 @@ def raw_weather_batch_latitude(context: dg.AssetExecutionContext):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
name="parsed",
|
||||||
|
deps=[raw_weather],
|
||||||
|
io_manager_key="polars_parquet_io_manager",
|
||||||
|
partitions_def=daily_partitions_def,
|
||||||
|
output_required=False,
|
||||||
|
)
|
||||||
|
def parsed_weather(
|
||||||
|
context: dg.AssetExecutionContext,
|
||||||
|
) -> Iterator[dg.Output[pl.DataFrame]]:
|
||||||
|
base = (
|
||||||
|
Path(context.resources.polars_parquet_io_manager.base_dir).joinpath(
|
||||||
|
*raw_weather.key.path
|
||||||
|
)
|
||||||
|
/ context.partition_key
|
||||||
|
)
|
||||||
|
|
||||||
|
dfs = []
|
||||||
|
ic(base)
|
||||||
|
for path in Path(base).rglob("*.json"):
|
||||||
|
df = pl.read_json(path)
|
||||||
|
df = df.select(
|
||||||
|
*(
|
||||||
|
pl.lit(v).alias(k)
|
||||||
|
for k, v in zip(
|
||||||
|
("_latitude", "_longitude", "_time"),
|
||||||
|
path.relative_to(base).with_suffix("").parts,
|
||||||
|
)
|
||||||
|
),
|
||||||
|
*df.columns,
|
||||||
|
)
|
||||||
|
dfs.append(df)
|
||||||
|
|
||||||
|
if dfs:
|
||||||
|
yield dg.Output(pl.concat(dfs))
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
import dagster as dg
|
import dagster as dg
|
||||||
|
|
||||||
latitude_partitions_def = dg.DynamicPartitionsDefinition(name="latitudes")
|
latitude_partitions_def = dg.DynamicPartitionsDefinition(name="latitudes")
|
||||||
longitude_partitions_def = dg.DynamicPartitionsDefinition(name="longitudes")
|
longitude_partitions_def = dg.DynamicPartitionsDefinition(name="longitudes")
|
||||||
location_partitions_def = dg.DynamicPartitionsDefinition(name="locations")
|
location_partitions_def = dg.DynamicPartitionsDefinition(name="locations")
|
||||||
|
daily_partitions_def = dg.DailyPartitionsDefinition(
|
||||||
|
start_date="2025-08-01", end_offset=1, timezone=os.environ.get("TZ", "UTC")
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user