use polars and duckdb for lazy processing

This commit is contained in:
2024-10-14 15:23:49 +02:00
parent 1e0528bdfb
commit f9c83d29b3
2 changed files with 55 additions and 57 deletions

View File

@@ -23,8 +23,7 @@ vinyl = Definitions(
assets=[deals, new_deals, works],
resources={
"polars_parquet_io_manager": PolarsParquetIOManager(),
"duckdb_io_manager": PandasDuckDBIOManager(database="vinyl.duckdb"),
# "duckdb": DuckDBResource(database="vinyl.duckdb")
"duckdb_io_manager": PandasDuckDBIOManager(database="vinyl"),
},
jobs=[deals_job, check_partititions_job, musicbrainz_lookup_job],
schedules=[deals_schedule],