demo of delta lake with forked io manager
This commit is contained in:
@@ -2,11 +2,15 @@ import sys
|
||||
from functools import partial
|
||||
from logging import getLogger
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from config import APP
|
||||
|
||||
import dagster as dg
|
||||
|
||||
asset = partial(dg.asset, key_prefix=APP)
|
||||
TAGS = {"app": APP}
|
||||
|
||||
asset = partial(dg.asset, key_prefix=APP, tags=TAGS)
|
||||
|
||||
|
||||
@asset()
|
||||
@@ -18,3 +22,27 @@ def logging(context):
|
||||
sys.__stderr__.write("This goes to stderr!\n")
|
||||
|
||||
getLogger("mylogger").info("This is an info message from mylogger")
|
||||
|
||||
|
||||
@asset(io_manager_key="delta_io_manager")
|
||||
def iris_dataset() -> pa.Table:
|
||||
df = pd.read_csv(
|
||||
"https://docs.dagster.io/assets/iris.csv",
|
||||
names=[
|
||||
"sepal_length_cm",
|
||||
"sepal_width_cm",
|
||||
"petal_length_cm",
|
||||
"petal_width_cm",
|
||||
"species",
|
||||
],
|
||||
)
|
||||
return pa.Table.from_pandas(df)
|
||||
|
||||
|
||||
@asset(
|
||||
io_manager_key="delta_io_manager", ins={"table": dg.AssetIn(key=iris_dataset.key)}
|
||||
)
|
||||
def iris_cleaned(table: pa.Table) -> pa.Table:
|
||||
df = table.to_pandas()
|
||||
result_df = df.dropna().drop_duplicates()
|
||||
return pa.Table.from_pandas(result_df)
|
||||
|
||||
Reference in New Issue
Block a user