Compare commits
31 Commits
030424e124
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 87924620fd | |||
| b15aaaa0dc | |||
| 3f99f354de | |||
| 204565118b | |||
| aa4a2fa5b1 | |||
| af913e258a | |||
| 7a8f15b1d6 | |||
| e9ad1677ef | |||
| 2a4da9abb9 | |||
| a9b9197150 | |||
| 883ecf86be | |||
| 7a600f6264 | |||
| 127a773c82 | |||
| 67a7e2dacf | |||
| fc6f120c53 | |||
| 55e8b31223 | |||
| 1d9bd68612 | |||
| e0cda85d20 | |||
| da55030498 | |||
| 316fe03be9 | |||
| bf537c86a4 | |||
| d2e34bca1c | |||
| 65593e5421 | |||
| 4242638818 | |||
| 4593b97bc2 | |||
| a0a0bbd110 | |||
| 2b4e34ec2f | |||
| eaf469d68f | |||
| 3c7f46fb4f | |||
| 866e190ed0 | |||
| 968d5c34de |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,4 +9,6 @@ src/history/
|
||||
src/logs/
|
||||
src/schedules/
|
||||
db/
|
||||
logs/
|
||||
.DS_Store
|
||||
*requirements.txt
|
||||
|
||||
11
apps/backup/Dockerfile
Normal file
11
apps/backup/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
FROM dagster-code-backup-base
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install --no-install-recommends --yes \
|
||||
borgbackup openssh-client \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /root/.ssh && chmod 0700 /root/.ssh/
|
||||
COPY --chmod=0600 id_rsa /root/.ssh/
|
||||
ADD --chmod=0600 ssh_config /root/.ssh/config
|
||||
146
apps/backup/src/assets.py
Normal file
146
apps/backup/src/assets.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import structlog
|
||||
from config import APP, BORG_HOST, BORG_ROOT
|
||||
from partitions import borg_repo_partitions_def, daily_partitions_def
|
||||
from shared.utils import get_partition_keys
|
||||
|
||||
import dagster as dg
|
||||
|
||||
asset = partial(dg.asset, key_prefix=APP)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@asset(
|
||||
partitions_def=dg.MultiPartitionsDefinition(
|
||||
{
|
||||
"date": daily_partitions_def,
|
||||
"repo": borg_repo_partitions_def,
|
||||
}
|
||||
)
|
||||
)
|
||||
def borg_archive(context: dg.AssetExecutionContext) -> None:
|
||||
pass
|
||||
|
||||
|
||||
@asset(
|
||||
deps=[borg_archive],
|
||||
partitions_def=dg.MultiPartitionsDefinition(
|
||||
{
|
||||
"date": daily_partitions_def,
|
||||
"repo": borg_repo_partitions_def,
|
||||
}
|
||||
),
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
)
|
||||
def borg_archive_info(context: dg.AssetExecutionContext) -> dg.Output[None]:
|
||||
partition_keys = get_partition_keys(context)
|
||||
ic(partition_keys)
|
||||
|
||||
location = f"ssh://{BORG_HOST}{BORG_ROOT}{partition_keys['repo']}::{partition_keys['date']}"
|
||||
ic(location)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["borg", "info", "--json", location],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
env={"BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK": "yes"},
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error("borg list failed", exc_info=e, code=e.returncode)
|
||||
sys.stderr.write("borg list failed\n" + e.stderr)
|
||||
data = json.loads(result.stdout)
|
||||
ic(data)
|
||||
tmp = data["archives"][0]
|
||||
|
||||
def parse_date(date_str, tz: str | None = None) -> dg.MetadataValue.timestamp:
|
||||
return dg.MetadataValue.timestamp(
|
||||
datetime.fromisoformat(date_str).replace(
|
||||
tzinfo=ZoneInfo(tz or os.environ.get("TZ", "CET"))
|
||||
)
|
||||
)
|
||||
|
||||
return dg.Output(
|
||||
None,
|
||||
metadata={
|
||||
"start": parse_date(tmp["start"]),
|
||||
"end": parse_date(tmp["end"]),
|
||||
"duration": dg.MetadataValue.float(tmp["duration"]),
|
||||
"compressed_size": dg.MetadataValue.int(tmp["stats"]["compressed_size"]),
|
||||
"deduplicated_size": dg.MetadataValue.int(
|
||||
tmp["stats"]["deduplicated_size"]
|
||||
),
|
||||
"nfiles": dg.MetadataValue.int(tmp["stats"]["nfiles"]),
|
||||
"original_size": dg.MetadataValue.int(tmp["stats"]["original_size"]),
|
||||
},
|
||||
)
|
||||
|
||||
# now run borg info ssh://shuttle/mnt/yotta/xenon/borg/opt/::2025-07-27 --json and register info
|
||||
|
||||
|
||||
@asset(
|
||||
partitions_def=borg_repo_partitions_def,
|
||||
)
|
||||
def borg_repo(context: dg.AssetExecutionContext) -> None:
|
||||
location = f"ssh://{BORG_HOST}{BORG_ROOT}{context.partition_key}"
|
||||
ic(location)
|
||||
repo = context.partition_key
|
||||
|
||||
# Get Borg backup list
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["borg", "list", "--json", location],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
env={"BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK": "yes"},
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error("borg list failed", exc_info=e, code=e.returncode)
|
||||
sys.stderr.write("borg list failed\n" + e.stderr)
|
||||
data = json.loads(result.stdout)
|
||||
ic(data)
|
||||
|
||||
for entry in data.get("archives", []):
|
||||
partition = f"{entry['archive']}|{repo}"
|
||||
context.log_event(
|
||||
dg.AssetMaterialization(
|
||||
asset_key=borg_archive.key,
|
||||
partition=partition,
|
||||
metadata={
|
||||
"id": dg.MetadataValue.text(entry["id"]),
|
||||
},
|
||||
)
|
||||
)
|
||||
# context.
|
||||
|
||||
# snapshots = data.get("archives", [])
|
||||
#
|
||||
# # Find latest backup for this day
|
||||
# match = next(
|
||||
# (s for s in reversed(snapshots)
|
||||
# if datetime.fromisoformat(s["end"]).date() == expected_date),
|
||||
# None
|
||||
# )
|
||||
#
|
||||
# if match:
|
||||
# context.log_event(
|
||||
# dg.AssetMaterialization(
|
||||
# asset_key=one.key, partition="2025-07-27"
|
||||
# ) # this works!
|
||||
# )
|
||||
#
|
||||
# return {
|
||||
# "name": match["name"],
|
||||
# "end": match["end"],
|
||||
# "size": match.get("size", 0)
|
||||
# }
|
||||
# else:
|
||||
# raise Exception(f"No backup found for {expected_date}")
|
||||
7
apps/backup/src/config.py
Normal file
7
apps/backup/src/config.py
Normal file
@@ -0,0 +1,7 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
APP = os.environ.get("APP", Path(__file__).parent.parent.name)
|
||||
|
||||
BORG_HOST = "backup"
|
||||
BORG_ROOT: str = "/mnt/yotta/xenon/borg/"
|
||||
22
apps/backup/src/definitions.py
Normal file
22
apps/backup/src/definitions.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import assets
|
||||
import sensors
|
||||
from config import APP
|
||||
from icecream import install
|
||||
|
||||
import dagster as dg
|
||||
|
||||
install()
|
||||
|
||||
definitions = dg.Definitions(
|
||||
assets=[
|
||||
asset.with_attributes(
|
||||
group_names_by_key={asset.key: APP},
|
||||
tags_by_key={asset.key: {"app": APP}},
|
||||
)
|
||||
for asset in dg.load_assets_from_modules([assets])
|
||||
],
|
||||
resources={},
|
||||
jobs=[],
|
||||
schedules=[],
|
||||
sensors=[sensors.borg_repos],
|
||||
)
|
||||
0
apps/backup/src/jobs.py
Normal file
0
apps/backup/src/jobs.py
Normal file
8
apps/backup/src/partitions.py
Normal file
8
apps/backup/src/partitions.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import os
|
||||
|
||||
import dagster as dg
|
||||
|
||||
borg_repo_partitions_def = dg.DynamicPartitionsDefinition(name="borg_repo")
|
||||
daily_partitions_def = dg.DailyPartitionsDefinition(
|
||||
start_date="2025-01-01", end_offset=1, timezone=os.environ.get("TZ", "UTC")
|
||||
)
|
||||
26
apps/backup/src/sensors.py
Normal file
26
apps/backup/src/sensors.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import structlog
|
||||
from partitions import borg_repo_partitions_def
|
||||
from utils.borg import get_ssh_client, list_repos
|
||||
|
||||
import dagster as dg
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@dg.sensor()
|
||||
def borg_repos(context: dg.SensorEvaluationContext) -> dg.SensorResult:
|
||||
existing_repos = set(
|
||||
context.instance.get_dynamic_partitions(borg_repo_partitions_def.name)
|
||||
)
|
||||
|
||||
with get_ssh_client() as client:
|
||||
parent = "/mnt/yotta/xenon/borg/"
|
||||
repos = set(list_repos(client, parent))
|
||||
|
||||
new_repos = list(set(repos) - existing_repos)
|
||||
return dg.SensorResult(
|
||||
# run_requests=[dg.RunRequest(partition_key=repo) for repo in new_repos],
|
||||
dynamic_partitions_requests=[
|
||||
borg_repo_partitions_def.build_add_request(new_repos),
|
||||
],
|
||||
)
|
||||
11
apps/backup/src/test.py
Normal file
11
apps/backup/src/test.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import structlog
|
||||
from utils.borg import get_ssh_client, list_repos
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
if __name__ == "__main__":
|
||||
with get_ssh_client() as client:
|
||||
parent = "/mnt/yotta/xenon/borg/"
|
||||
repos = set(list_repos(client, parent))
|
||||
|
||||
print(repos)
|
||||
0
apps/backup/src/utils/__init__.py
Normal file
0
apps/backup/src/utils/__init__.py
Normal file
59
apps/backup/src/utils/borg.py
Normal file
59
apps/backup/src/utils/borg.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from collections.abc import Iterator
|
||||
from configparser import ConfigParser
|
||||
from contextlib import contextmanager
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
import paramiko
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_ssh_client():
|
||||
ssh_config_file = Path.home() / ".ssh/config"
|
||||
|
||||
with open(ssh_config_file) as f:
|
||||
ssh_config = paramiko.SSHConfig()
|
||||
ssh_config.parse(f)
|
||||
|
||||
host_config = ssh_config.lookup("backup") # the host alias in ~/.ssh/config
|
||||
|
||||
hostname = host_config.get("hostname", "localhost")
|
||||
port = int(host_config.get("port", 22))
|
||||
username = host_config.get("user")
|
||||
key_filename = host_config.get("identityfile", [None])[0]
|
||||
|
||||
# Connect using Paramiko
|
||||
client = paramiko.SSHClient()
|
||||
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
client.connect(
|
||||
hostname=hostname, port=port, username=username, key_filename=key_filename
|
||||
)
|
||||
|
||||
yield client
|
||||
|
||||
client.close()
|
||||
|
||||
|
||||
def list_repos(client, parent) -> Iterator[str]:
|
||||
command = f"ls {parent}*/config"
|
||||
stdin, stdout, stderr = client.exec_command(command)
|
||||
paths = [line.strip() for line in stdout.readlines()]
|
||||
|
||||
sftp = client.open_sftp()
|
||||
for path in paths:
|
||||
name = Path(path).parent.name
|
||||
logger.info("Opening path", name=name)
|
||||
with sftp.open(path, "r") as f:
|
||||
try:
|
||||
content = f.read().decode()
|
||||
config = ConfigParser()
|
||||
config.read_file(StringIO(content))
|
||||
config.get("repository", "version")
|
||||
yield name
|
||||
except Exception as e:
|
||||
logger.warning("Not a borg repository!", e=e)
|
||||
|
||||
sftp.close()
|
||||
4
apps/backup/ssh_config
Normal file
4
apps/backup/ssh_config
Normal file
@@ -0,0 +1,4 @@
|
||||
Host backup
|
||||
HostName rik.veenboer.xyz
|
||||
User backup
|
||||
StrictHostKeyChecking no
|
||||
@@ -1,368 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=other
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
arro3-core==0.5.1
|
||||
# via deltalake
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-delta
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-delta @ git+https://github.com/ASML-Labs/dagster-delta.git@d28de7a7c13b7071f42231234eb9231269c7c1bf#subdirectory=libraries/dagster-delta
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
deltalake==1.1.3
|
||||
# via dagster-delta
|
||||
deprecated==1.2.18
|
||||
# via deltalake
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pendulum==3.1.0
|
||||
# via dagster-delta
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
# pendulum
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# arro3-core
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via
|
||||
# pandas
|
||||
# pendulum
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
wrapt==1.17.2
|
||||
# via deprecated
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -1,356 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=stocks
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pendulum==3.1.0
|
||||
# via dev (pyproject.toml)
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
# pendulum
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via
|
||||
# pandas
|
||||
# pendulum
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -8,31 +8,33 @@ async def scrape(url: str) -> str:
|
||||
|
||||
await page.goto(url, timeout=60000)
|
||||
|
||||
# Wait until at least one toggle button is present
|
||||
await page.wait_for_selector(".toggle-btn", timeout=20000)
|
||||
# Wait until buttons are available
|
||||
await page.wait_for_selector('div[role="button"][aria-expanded]', timeout=20000)
|
||||
|
||||
# Set zoom
|
||||
# Zoom out for full view
|
||||
await page.evaluate("document.body.style.zoom='50%'")
|
||||
|
||||
# Find all toggle buttons
|
||||
toggle_buttons = await page.query_selector_all(".toggle-btn")
|
||||
print(f"Found {len(toggle_buttons)} toggle buttons")
|
||||
# Find collapsible buttons
|
||||
toggle_buttons = await page.query_selector_all(
|
||||
'div[role="button"][aria-expanded]'
|
||||
)
|
||||
print(f"Found {len(toggle_buttons)} expandable buttons")
|
||||
|
||||
for i, btn in enumerate(toggle_buttons):
|
||||
try:
|
||||
# Ensure it's visible and enabled
|
||||
if await btn.is_visible() and await btn.is_enabled():
|
||||
await btn.click()
|
||||
await page.wait_for_timeout(1000)
|
||||
aria_expanded = await btn.get_attribute("aria-expanded")
|
||||
if aria_expanded == "false":
|
||||
if await btn.is_visible() and await btn.is_enabled():
|
||||
await btn.click()
|
||||
await page.wait_for_timeout(1000)
|
||||
|
||||
if i == len(toggle_buttons) - 1:
|
||||
break
|
||||
|
||||
# Scroll down gradually
|
||||
# Scroll gradually
|
||||
scroll_step = 500
|
||||
total_height = await page.evaluate("() => document.body.scrollHeight")
|
||||
current_position = 0
|
||||
|
||||
while current_position < total_height:
|
||||
await page.evaluate(f"window.scrollTo(0, {current_position});")
|
||||
await page.wait_for_timeout(100)
|
||||
@@ -44,17 +46,14 @@ async def scrape(url: str) -> str:
|
||||
except Exception as e:
|
||||
print(f"Skipped button due to error: {e}")
|
||||
|
||||
# Get the page content
|
||||
# Capture expanded HTML
|
||||
page_source = await page.content()
|
||||
|
||||
# Close the browser
|
||||
await browser.close()
|
||||
|
||||
# Continue scraping logic here...
|
||||
print("Scraping done")
|
||||
|
||||
# Save the page content to a file
|
||||
# Save to file
|
||||
with open("/cache/scraped_page.html", "w") as fp:
|
||||
fp.write(page_source)
|
||||
|
||||
print("Scraping done")
|
||||
|
||||
return page_source
|
||||
|
||||
@@ -1,351 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=tesla
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -1,353 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=vinyl
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
jinja2==3.1.6
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -12,17 +12,20 @@ from dagster_polars.patito import patito_model_to_dagster_type
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from models import Deal
|
||||
from partitions import daily_partitions_def, multi_partitions_def
|
||||
from plato.fetch import scrape_plato
|
||||
from platenzaak.parse import parse as parse_platenzaak
|
||||
from platenzaak.scrape import scrape as scrape_platenzaak
|
||||
from plato.parse import parse as parse_plato
|
||||
from plato.scrape import scrape as scrape_plato
|
||||
from shared.utils import get_partition_keys, parse_partition_keys
|
||||
from sounds.fetch import fetch_deals
|
||||
from sounds.parse import parse as parse_sounds
|
||||
from sounds.scrape import scrape as scrape_sounds
|
||||
from structlog.stdlib import BoundLogger
|
||||
from utils.email import EmailService
|
||||
|
||||
import dagster as dg
|
||||
|
||||
asset = partial(dg.asset, key_prefix=APP)
|
||||
logger = structlog.get_logger()
|
||||
logger: BoundLogger = structlog.get_logger()
|
||||
|
||||
|
||||
@asset(
|
||||
@@ -63,22 +66,24 @@ def deals(context: dg.AssetExecutionContext) -> pl.DataFrame:
|
||||
logger.error("Failed to load CSV file!", error=e)
|
||||
raise dg.Failure(f"Cannot materialize for the past: {date.date()}")
|
||||
|
||||
if source == "plato":
|
||||
logger.info("Scraping Plato")
|
||||
df = scrape_plato()
|
||||
logger.info("Scraped Plato", rows=len(df), head=df.head().to_markdown())
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
if source == "sounds":
|
||||
logger.info("Scraping Sounds")
|
||||
df = fetch_deals()
|
||||
ic(df.columns)
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
match source:
|
||||
case "plato":
|
||||
logger.info("Scraping Plato")
|
||||
df = scrape_plato()
|
||||
logger.info("Scraped Plato", rows=len(df), head=df.head().to_markdown())
|
||||
case "sounds":
|
||||
logger.info("Scraping Sounds")
|
||||
df = scrape_sounds()
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
case "platenzaak":
|
||||
logger.info("Scraping Platenzaak")
|
||||
df = scrape_platenzaak(logger=logger)
|
||||
logger.info("Scraped Sounds", rows=len(df), head=df.head().to_markdown())
|
||||
case _:
|
||||
raise ValueError(f"Unknown source: {source}!")
|
||||
|
||||
return pl.DataFrame(
|
||||
[{"date": context.partition_key, "data": f"Data for {context.partition_key}"}]
|
||||
)
|
||||
ic(df.columns)
|
||||
return pl.from_pandas(df.assign(**partition_key))
|
||||
|
||||
|
||||
@asset(
|
||||
@@ -105,9 +110,10 @@ def cleaned_deals(
|
||||
parsed_df = parse_plato(df)
|
||||
case "sounds":
|
||||
parsed_df = parse_sounds(df)
|
||||
case "platenzaak":
|
||||
parsed_df = parse_platenzaak(df)
|
||||
case _:
|
||||
context.log.warning(f"Unknown source: {source}!")
|
||||
return
|
||||
raise ValueError(f"Unknown source: {source}!")
|
||||
|
||||
ic(parsed_df.collect_schema())
|
||||
|
||||
@@ -155,7 +161,7 @@ def works(context: dg.AssetExecutionContext) -> Iterator[dg.Output[pl.DataFrame]
|
||||
"date": dg.DimensionPartitionMapping(
|
||||
dimension_name="date",
|
||||
partition_mapping=dg.TimeWindowPartitionMapping(
|
||||
start_offset=-10,
|
||||
start_offset=-3,
|
||||
end_offset=0,
|
||||
allow_nonexistent_upstream_partitions=True,
|
||||
),
|
||||
@@ -170,9 +176,7 @@ def works(context: dg.AssetExecutionContext) -> Iterator[dg.Output[pl.DataFrame]
|
||||
},
|
||||
output_required=False,
|
||||
dagster_type=patito_model_to_dagster_type(Deal),
|
||||
automation_condition=dg.AutomationCondition.on_missing().without(
|
||||
dg.AutomationCondition.in_latest_time_window()
|
||||
),
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
)
|
||||
def new_deals(
|
||||
context: dg.AssetExecutionContext, partitions: dict[str, pl.LazyFrame | None]
|
||||
@@ -192,7 +196,6 @@ def new_deals(
|
||||
|
||||
if len(partition_keys := sorted(partitions.keys())) < 2:
|
||||
context.log.warning("Not enough partitions to fetch new deals!")
|
||||
|
||||
return
|
||||
|
||||
before, after = partition_keys[-2:]
|
||||
@@ -214,7 +217,9 @@ def new_deals(
|
||||
new_df = df_after.join(df_before.select("id"), on="id", how="anti").collect()
|
||||
if new_df.height:
|
||||
context.log.info(f"New deals found ({new_df.height}x)!")
|
||||
yield dg.Output(Deal.DataFrame(new_df))
|
||||
yield dg.Output(
|
||||
Deal.DataFrame(new_df.with_columns(pl.col("release").cast(pl.Date)))
|
||||
)
|
||||
else:
|
||||
context.log.info("No new deals found!")
|
||||
|
||||
@@ -227,7 +232,9 @@ def new_deals(
|
||||
},
|
||||
ins={"partitions": dg.AssetIn(key=new_deals.key)},
|
||||
output_required=False,
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
automation_condition=dg.AutomationCondition.eager().without(
|
||||
~dg.AutomationCondition.any_deps_missing()
|
||||
),
|
||||
)
|
||||
def good_deals(
|
||||
context: dg.AssetExecutionContext,
|
||||
@@ -237,6 +244,9 @@ def good_deals(
|
||||
parsed_partition_keys = parse_partition_keys(context, "partitions")
|
||||
ic(parsed_partition_keys)
|
||||
|
||||
if not partitions:
|
||||
logger.warning("Partitions are empty!")
|
||||
return
|
||||
df = pl.concat(partitions.values(), how="vertical_relaxed").collect()
|
||||
|
||||
counts = dict(df.group_by("source").len().iter_rows())
|
||||
@@ -259,7 +269,7 @@ def good_deals(
|
||||
]
|
||||
|
||||
# Render HTML from Jinja template
|
||||
env = Environment(loader=FileSystemLoader(f"/apps/{APP}"))
|
||||
env = Environment(loader=FileSystemLoader(f"/code/apps/{APP}"))
|
||||
template = env.get_template("email.html")
|
||||
html_content = template.render(deals=deals)
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ define_asset_job = partial(dg.define_asset_job, **kwargs)
|
||||
|
||||
deals_job = dg.define_asset_job(
|
||||
"deals_job",
|
||||
selection=[assets.deals.key],
|
||||
partitions_def=assets.deals.partitions_def,
|
||||
selection=dg.AssetSelection.assets(assets.new_deals.key).upstream(),
|
||||
partitions_def=assets.new_deals.partitions_def,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -10,5 +10,7 @@ class Deal(pt.Model):
|
||||
title: str = pt.Field(description="Title of the deal.")
|
||||
url: str = pt.Field(description="URL to the deal.")
|
||||
date: datetime.date = pt.Field(description="Day the deal was listed.")
|
||||
release: datetime.date = pt.Field(description="Release date.")
|
||||
release: datetime.date | None = pt.Field(
|
||||
description="Release date.", allow_missing=True
|
||||
)
|
||||
price: float = pt.Field(description="Price of the deal in EUR.")
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
|
||||
import dagster as dg
|
||||
|
||||
SOURCES = ["plato", "sounds"]
|
||||
SOURCES = ["plato", "sounds", "platenzaak"]
|
||||
daily_partitions_def = dg.DailyPartitionsDefinition(
|
||||
start_date="2024-09-01", end_offset=1, timezone=os.environ.get("TZ", "UTC")
|
||||
)
|
||||
|
||||
0
apps/vinyl/src/platenzaak/__init__.py
Normal file
0
apps/vinyl/src/platenzaak/__init__.py
Normal file
13
apps/vinyl/src/platenzaak/parse.py
Normal file
13
apps/vinyl/src/platenzaak/parse.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import polars as pl
|
||||
|
||||
|
||||
def parse(df: pl.LazyFrame) -> pl.LazyFrame:
|
||||
"""Parse the Platenzaak DataFrame."""
|
||||
return df.with_columns(
|
||||
date=pl.col("date").cast(pl.Date),
|
||||
artist=pl.col("artist").str.strip_chars().str.to_lowercase(),
|
||||
title=pl.col("album").str.strip_chars().str.to_lowercase(),
|
||||
release=pl.lit(None),
|
||||
price=pl.col("current_price").cast(pl.Float64),
|
||||
url=pl.format("https://platenzaak.nl{}", pl.col("id")),
|
||||
)
|
||||
90
apps/vinyl/src/platenzaak/scrape.py
Normal file
90
apps/vinyl/src/platenzaak/scrape.py
Normal file
@@ -0,0 +1,90 @@
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from structlog.stdlib import BoundLogger
|
||||
|
||||
|
||||
def parse_price(price_block):
|
||||
"""
|
||||
Convert a price block like:
|
||||
<span class="amount theme-money">€ 30<sup>99</sup></span>
|
||||
into a float: 30.99
|
||||
"""
|
||||
if not price_block:
|
||||
return None
|
||||
|
||||
# Extract the main number (before <sup>)
|
||||
main = price_block.find(string=True, recursive=False)
|
||||
main = main.strip().replace("€", "").replace(",", ".").strip()
|
||||
|
||||
# Extract the <sup> part (cents)
|
||||
sup = price_block.find("sup")
|
||||
cents = sup.get_text(strip=True) if sup else "00"
|
||||
|
||||
try:
|
||||
return float(f"{main}.{cents}")
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_page(html) -> Iterator[dict]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
for block in soup.select("div.product-block__inner"):
|
||||
# Wishlist button holds most metadata
|
||||
wishlist = block.select_one("[data-wlh-id]")
|
||||
if not wishlist:
|
||||
continue
|
||||
|
||||
product = {
|
||||
"id": wishlist.get("data-wlh-id"),
|
||||
"variant_id": wishlist.get("data-wlh-variantid"),
|
||||
"name": wishlist.get("data-wlh-name"),
|
||||
"price": wishlist.get("data-wlh-price"),
|
||||
"url": wishlist.get("data-wlh-link"),
|
||||
"image": wishlist.get("data-wlh-image"),
|
||||
}
|
||||
|
||||
# Artist + Title (in the title link)
|
||||
title_block = block.select_one(".product-block__title-price .title")
|
||||
if title_block:
|
||||
artist = title_block.find("span")
|
||||
if artist:
|
||||
product["artist"] = artist.get_text(strip=True)
|
||||
# The text after <br> is the album title
|
||||
product["album"] = (
|
||||
title_block.get_text(separator="|").split("|")[-1].strip()
|
||||
)
|
||||
|
||||
# Current price (might include discounts)
|
||||
price_block = block.select_one(".price .amount")
|
||||
product["current_price"] = parse_price(price_block)
|
||||
|
||||
# Original price if on sale
|
||||
old_price_block = block.select_one(".price del .theme-money")
|
||||
product["original_price"] = parse_price(old_price_block)
|
||||
|
||||
# Sale label
|
||||
sale_label = block.select_one(".product-label--sale")
|
||||
product["on_sale"] = bool(sale_label)
|
||||
|
||||
yield product
|
||||
|
||||
|
||||
def scrape(logger: BoundLogger) -> pd.DataFrame:
|
||||
page = 1
|
||||
products = []
|
||||
while True:
|
||||
response = requests.get(
|
||||
f"https://www.platenzaak.nl/collections/sale?filter.p.m.custom.config_group=Vinyl&page={page}"
|
||||
)
|
||||
response.raise_for_status()
|
||||
page_products = list(parse_page(response.text))
|
||||
logger.info("Scraped page", page=page, products=len(page_products))
|
||||
if not page_products:
|
||||
break
|
||||
products.extend(page_products)
|
||||
page += 1
|
||||
return pd.DataFrame(products)
|
||||
@@ -1,154 +0,0 @@
|
||||
import os
|
||||
|
||||
import boto3
|
||||
import pandas as pd
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
from dotenv import load_dotenv
|
||||
from fetch import scrape_plato
|
||||
from utils import get
|
||||
|
||||
|
||||
def update_database(articles_df=None, database_file="/home/user/plato.parquet"):
|
||||
if os.path.exists(database_file):
|
||||
database_df = pd.read_parquet(database_file)
|
||||
else:
|
||||
database_df = None
|
||||
|
||||
if articles_df is None:
|
||||
new_df = None if database_df is None else database_df.head(0)
|
||||
else:
|
||||
if database_df is None:
|
||||
articles_df.to_parquet(database_file)
|
||||
return articles_df, articles_df
|
||||
|
||||
compare = ["ean", "_price"]
|
||||
check_df = pd.merge(
|
||||
database_df[compare], articles_df[compare], how="right", indicator=True
|
||||
)
|
||||
new_df = (
|
||||
check_df[check_df["_merge"] == "right_only"]
|
||||
.drop(columns="_merge")
|
||||
.merge(articles_df)
|
||||
)
|
||||
database_df = (
|
||||
pd.concat([database_df, new_df])
|
||||
.sort_values("_date")
|
||||
.groupby("ean")
|
||||
.last()
|
||||
.reset_index()
|
||||
)
|
||||
database_df.to_parquet(database_file)
|
||||
|
||||
return database_df, new_df
|
||||
|
||||
|
||||
def send_email(lines):
|
||||
# Define the email parameters
|
||||
SENDER = "mail@veenboer.xyz"
|
||||
RECIPIENT = "rik.veenboer@gmail.com"
|
||||
SUBJECT = "Aanbieding op plato!"
|
||||
|
||||
# The email body for recipients with non-HTML email clients
|
||||
BODY_TEXT = ""
|
||||
|
||||
# The HTML body of the email
|
||||
tmp = "\n".join(lines)
|
||||
BODY_HTML = f"""<html>
|
||||
<head></head>
|
||||
<body>
|
||||
{tmp}
|
||||
</html>
|
||||
"""
|
||||
|
||||
# The character encoding for the email
|
||||
CHARSET = "UTF-8"
|
||||
|
||||
# Try to send the email
|
||||
try:
|
||||
client = boto3.client(
|
||||
"ses", region_name="eu-west-1"
|
||||
) # Change the region as needed
|
||||
|
||||
# Provide the contents of the email
|
||||
response = client.send_email(
|
||||
Destination={
|
||||
"ToAddresses": [
|
||||
RECIPIENT,
|
||||
],
|
||||
},
|
||||
Message={
|
||||
"Body": {
|
||||
"Html": {
|
||||
"Charset": CHARSET,
|
||||
"Data": BODY_HTML,
|
||||
},
|
||||
"Text": {
|
||||
"Charset": CHARSET,
|
||||
"Data": BODY_TEXT,
|
||||
},
|
||||
},
|
||||
"Subject": {
|
||||
"Charset": CHARSET,
|
||||
"Data": SUBJECT,
|
||||
},
|
||||
},
|
||||
Source=SENDER,
|
||||
)
|
||||
# Display an error if something goes wrong.
|
||||
except NoCredentialsError:
|
||||
print("Credentials not available")
|
||||
except PartialCredentialsError:
|
||||
print("Incomplete credentials provided")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
else:
|
||||
print("Email sent! Message ID:"),
|
||||
print(response["MessageId"])
|
||||
|
||||
|
||||
def main(dry=False):
|
||||
load_dotenv("/opt/.env")
|
||||
|
||||
local_ip = get("http://ifconfig.me", False).text
|
||||
get_ip = get("http://ifconfig.me").text
|
||||
print(f"Local IP = {local_ip}")
|
||||
print(f"Request IP = {get_ip}")
|
||||
assert local_ip != get_ip
|
||||
|
||||
artists = open("/home/user/artists.txt").read().strip().splitlines()
|
||||
print(f"Number of known artists = {len(artists)}")
|
||||
|
||||
if dry:
|
||||
articles_df = None
|
||||
else:
|
||||
articles_df = scrape_plato(get=get)
|
||||
database_df, new_df = update_database(articles_df)
|
||||
|
||||
if dry:
|
||||
new_df = database_df.sample(20)
|
||||
|
||||
print(f"Database size = {len(database_df)}")
|
||||
print(f"New = {len(new_df)}")
|
||||
|
||||
# new_df = new_df[new_df['_artist'].isin(artists)].query('_price <= 25')
|
||||
new_df = new_df.query('_price <= 25 and ean != ""')
|
||||
print(f"Interesting = {len(new_df)}")
|
||||
|
||||
if new_df is not None and len(new_df):
|
||||
message = []
|
||||
for _, row in new_df.head(10).iterrows():
|
||||
message.append(
|
||||
f'<a href="https://www.platomania.nl{row.url}"><h1>NEW</h1></a>'
|
||||
)
|
||||
message.append("<ul>")
|
||||
message.append(f"<li>[artist] {row.artist}</li>")
|
||||
message.append(f"<li>[title] {row.title}</li>")
|
||||
message.append(f"<li>[price] {row.price}</li>")
|
||||
message.append(f"<li>[release] {row.release_date}</li>")
|
||||
message.append("</ul>")
|
||||
send_email(message)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cwd = os.path.dirname(__file__)
|
||||
main(dry=False)
|
||||
@@ -1,52 +0,0 @@
|
||||
#!/root/.pyenv/versions/dev/bin/python
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .scrape import get_soup, scrape_page, scrape_page_links
|
||||
|
||||
|
||||
def scrape_plato(get=None):
|
||||
ic()
|
||||
url = "https://www.platomania.nl/vinyl-aanbiedingen?page=1"
|
||||
|
||||
ic(url)
|
||||
soup = get_soup(url=url, get=get)
|
||||
articles_info = scrape_page(soup)
|
||||
ic(len(articles_info))
|
||||
|
||||
links = sorted(set(scrape_page_links(soup)), key=lambda x: int(x.split("=")[-1]))
|
||||
for link in links:
|
||||
ic(link)
|
||||
soup = get_soup(url=link, get=get)
|
||||
tmp = scrape_page(soup)
|
||||
ic(len(tmp))
|
||||
articles_info.extend(tmp)
|
||||
|
||||
def clean(name):
|
||||
tmp = " ".join(reversed(name.split(", ")))
|
||||
tmp = tmp.lower()
|
||||
tmp = re.sub(r"\s+\([^)]*\)", "", tmp)
|
||||
return tmp
|
||||
|
||||
articles_df = pd.DataFrame(articles_info).reindex(
|
||||
columns=[
|
||||
"artist",
|
||||
"title",
|
||||
"url",
|
||||
"label",
|
||||
"release_date",
|
||||
"origin",
|
||||
"item_number",
|
||||
"ean",
|
||||
"delivery_info",
|
||||
"price",
|
||||
]
|
||||
)
|
||||
articles_df["_artist"] = articles_df["artist"].map(clean)
|
||||
articles_df["_price"] = articles_df["price"].map(lambda x: float(x.split(" ")[-1]))
|
||||
articles_df["_date"] = datetime.now()
|
||||
|
||||
return articles_df
|
||||
60
apps/vinyl/src/plato/scrape.py
Normal file → Executable file
60
apps/vinyl/src/plato/scrape.py
Normal file → Executable file
@@ -1,21 +1,61 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def scrape(get=None):
|
||||
ic()
|
||||
url = "https://www.platomania.nl/vinyl-aanbiedingen?page=1"
|
||||
|
||||
ic(url)
|
||||
soup = get_soup(url=url, get=get)
|
||||
articles_info = scrape_page(soup)
|
||||
ic(len(articles_info))
|
||||
|
||||
links = sorted(set(scrape_page_links(soup)), key=lambda x: int(x.split("=")[-1]))
|
||||
for link in links:
|
||||
ic(link)
|
||||
soup = get_soup(url=link, get=get)
|
||||
tmp = scrape_page(soup)
|
||||
ic(len(tmp))
|
||||
articles_info.extend(tmp)
|
||||
|
||||
def clean(name):
|
||||
tmp = " ".join(reversed(name.split(", ")))
|
||||
tmp = tmp.lower()
|
||||
tmp = re.sub(r"\s+\([^)]*\)", "", tmp)
|
||||
return tmp
|
||||
|
||||
articles_df = pd.DataFrame(articles_info).reindex(
|
||||
columns=[
|
||||
"artist",
|
||||
"title",
|
||||
"url",
|
||||
"label",
|
||||
"release_date",
|
||||
"origin",
|
||||
"item_number",
|
||||
"ean",
|
||||
"delivery_info",
|
||||
"price",
|
||||
]
|
||||
)
|
||||
articles_df["_artist"] = articles_df["artist"].map(clean)
|
||||
articles_df["_price"] = articles_df["price"].map(lambda x: float(x.split(" ")[-1]))
|
||||
articles_df["_date"] = datetime.now()
|
||||
|
||||
return articles_df
|
||||
|
||||
|
||||
def get_soup(url, get=None):
|
||||
# Send a GET request to the specified URL
|
||||
if get is None:
|
||||
get = requests.get
|
||||
response = get(url)
|
||||
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
# Parse the HTML content of the page
|
||||
return BeautifulSoup(response.content, "html.parser")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Failed to retrieve the page. Status code: {response.status_code}"
|
||||
)
|
||||
response.raise_for_status()
|
||||
return BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
|
||||
def scrape_page_links(soup):
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import glob
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_csvs(directory, n):
|
||||
# List all files matching the pattern *_sounds.csv
|
||||
suffix = "_sounds.csv"
|
||||
files = glob.glob(os.path.join(directory, f"*{suffix}"))
|
||||
|
||||
# Function to extract date from filename
|
||||
def extract_date_from_filename(filename):
|
||||
# Extract the date string
|
||||
basename = os.path.basename(filename)
|
||||
date_str = basename.split(suffix)[0]
|
||||
try:
|
||||
return datetime.strptime(date_str, "%Y-%m-%d_%H:%M:%S")
|
||||
except ValueError:
|
||||
# The date string cannot be parsed
|
||||
return None
|
||||
|
||||
# Create a list of tuples (date, filename), ignoring files with unparsable dates
|
||||
result = [(extract_date_from_filename(file), file) for file in files]
|
||||
result = [item for item in result if item[0] is not None]
|
||||
|
||||
# Sort the list by date in descending order (most recent first)
|
||||
result.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
# Return the two most recent files
|
||||
return [x[1] for x in result[:n]]
|
||||
|
||||
|
||||
def analyze(df1, df2):
|
||||
df1 = df1.drop_duplicates(subset="id")
|
||||
df2 = df2.drop_duplicates(subset="id")
|
||||
combined_df = pd.merge(
|
||||
df1[["id", "price"]], df2, on="id", how="right", indicator=True
|
||||
)
|
||||
combined_df["discount"] = combined_df.price_y - combined_df.price_x
|
||||
combined_df.drop(columns=["price_x"], inplace=True)
|
||||
combined_df.rename(columns={"price_y": "price"}, inplace=True)
|
||||
|
||||
deals = combined_df.query("discount < 0").sort_values(by="discount")[
|
||||
["id", "name", "price", "discount"]
|
||||
]
|
||||
new = combined_df.query("_merge == 'right_only'").sort_values(by="price")[
|
||||
["id", "name", "price"]
|
||||
]
|
||||
return deals, new
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
csvs = get_csvs(".", 100)
|
||||
|
||||
for i in range(1, len(csvs)):
|
||||
print(f"Comparing {csvs[i]} with {csvs[0]}")
|
||||
df_previous = pd.read_csv(csvs[i], index_col=0)
|
||||
df_latest = pd.read_csv(csvs[0], index_col=0)
|
||||
deals, new = analyze(df_previous, df_latest)
|
||||
|
||||
done = False
|
||||
|
||||
if len(deals) > 0:
|
||||
print()
|
||||
print("New items:")
|
||||
print(new)
|
||||
print()
|
||||
done = True
|
||||
|
||||
if len(deals) > 0:
|
||||
print("Discounted items:")
|
||||
print(deals)
|
||||
done = True
|
||||
|
||||
if done:
|
||||
break
|
||||
@@ -3,7 +3,7 @@ from utils.parse import parse_date
|
||||
|
||||
|
||||
def parse(df: pl.LazyFrame) -> pl.LazyFrame:
|
||||
"""Parse the Plato DataFrame."""
|
||||
"""Parse the Sounds DataFrame."""
|
||||
return df.with_columns(
|
||||
date=pl.col("date").cast(pl.Date),
|
||||
artist=pl.coalesce(pl.col("artist"), pl.col("name").str.split("-").list.get(1))
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
@@ -74,11 +71,11 @@ def parse_page(html_content):
|
||||
)
|
||||
|
||||
|
||||
def fetch_deals():
|
||||
def scrape():
|
||||
# Get page count
|
||||
page_count = get_page_count(
|
||||
requests.get("https://www.sounds.nl/uitverkoop/1/lp/all/art").text
|
||||
)
|
||||
response = requests.get("https://www.sounds.nl/uitverkoop/1/lp/all/art")
|
||||
response.raise_for_status()
|
||||
page_count = get_page_count(response.text)
|
||||
time.sleep(1)
|
||||
print(f"Number of pages: {page_count}")
|
||||
|
||||
@@ -86,25 +83,11 @@ def fetch_deals():
|
||||
base_url = "https://www.sounds.nl/uitverkoop/{page_number}/lp/all"
|
||||
dfs = []
|
||||
for i in tqdm(range(page_count)):
|
||||
df = parse_page(requests.get(base_url.format(page_number=i)).text)
|
||||
response = requests.get(base_url.format(page_number=i))
|
||||
response.raise_for_status()
|
||||
df = parse_page(response.text)
|
||||
dfs.append(df)
|
||||
time.sleep(2)
|
||||
|
||||
# Combine dfs
|
||||
return pd.concat(dfs) if dfs else pd.DataFrame(columns=["id", "name", "price"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
df = fetch_deals()
|
||||
print(f"Found {len(df)} deals")
|
||||
|
||||
# Show current deals
|
||||
print(df.sort_values(by="price").head(10))
|
||||
|
||||
# Write to file
|
||||
now = datetime.now()
|
||||
prefix = now.strftime("%Y-%m-%d_%H:%M:%S")
|
||||
directory = "/home/bram/src/python"
|
||||
filepath = f"{directory}/{prefix}_sounds.csv"
|
||||
print(f"Writing data to {filepath}")
|
||||
df.to_csv(filepath)
|
||||
@@ -1,391 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=weather
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
attrs==25.3.0
|
||||
# via
|
||||
# cattrs
|
||||
# requests-cache
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
cattrs==25.1.1
|
||||
# via requests-cache
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via
|
||||
# niquests
|
||||
# requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
flatbuffers==25.2.10
|
||||
# via openmeteo-sdk
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via
|
||||
# urllib3-future
|
||||
# uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# url-normalize
|
||||
# yarl
|
||||
jh2==5.0.9
|
||||
# via urllib3-future
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
niquests==3.14.1
|
||||
# via openmeteo-requests
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openmeteo-requests==1.6.0
|
||||
# via dev (pyproject.toml)
|
||||
openmeteo-sdk==1.20.1
|
||||
# via openmeteo-requests
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
platformdirs==4.3.8
|
||||
# via requests-cache
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
qh3==1.5.3
|
||||
# via urllib3-future
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-cache
|
||||
# requests-toolbelt
|
||||
# retry-requests
|
||||
requests-cache==1.2.1
|
||||
# via dev (pyproject.toml)
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
retry-requests==2.0.0
|
||||
# via dev (pyproject.toml)
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# cattrs
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
url-normalize==2.2.1
|
||||
# via requests-cache
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
# requests-cache
|
||||
# retry-requests
|
||||
urllib3-future==2.13.901
|
||||
# via niquests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
wassima==1.2.2
|
||||
# via niquests
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -102,13 +102,15 @@ def raw_weather(context: dg.AssetExecutionContext) -> Any:
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
date_str = now.strftime("%Y-%m-%d")
|
||||
time_str = now.strftime("%H:%M:%S")
|
||||
|
||||
latitude_str, longitude_str = partition_key[:5], partition_key[5:]
|
||||
yield dg.Output(
|
||||
data,
|
||||
metadata={
|
||||
"date": dg.MetadataValue.timestamp(now),
|
||||
"latitude": dg.MetadataValue.float(latitude),
|
||||
"longitude": dg.MetadataValue.float(longitude),
|
||||
"path_suffix": [date_str, time_str],
|
||||
"path": [APP, "raw", date_str, latitude_str, longitude_str, time_str],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -144,6 +146,7 @@ def raw_weather_batch_latitude(context: dg.AssetExecutionContext) -> None:
|
||||
|
||||
fetcher = WeatherFetcher()
|
||||
latitude, longitude = parse_coordinate_str(location)
|
||||
ic(latitude, longitude)
|
||||
data = fetcher.fetch(latitude=latitude, longitude=longitude)
|
||||
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
@@ -176,6 +179,7 @@ def raw_weather_batch_latitude(context: dg.AssetExecutionContext) -> None:
|
||||
io_manager_key="polars_parquet_io_manager",
|
||||
partitions_def=daily_partitions_def,
|
||||
output_required=False,
|
||||
automation_condition=dg.AutomationCondition.eager(),
|
||||
)
|
||||
def parsed_weather(
|
||||
context: dg.AssetExecutionContext,
|
||||
|
||||
@@ -22,9 +22,10 @@ definitions = dg.Definitions(
|
||||
"polars_parquet_io_manager": PolarsParquetIOManager(base_dir=STORAGE_DIR),
|
||||
},
|
||||
sensors=[
|
||||
# sensors.list_locations,
|
||||
sensors.list_locations,
|
||||
sensors.list_latitudes,
|
||||
# sensors.list_longitudes,
|
||||
sensors.list_longitudes,
|
||||
sensors.retrieve_weather,
|
||||
sensors.retrieve_weather,
|
||||
],
|
||||
)
|
||||
|
||||
@@ -39,12 +39,12 @@ def list_locations(context: dg.SensorEvaluationContext) -> dg.SensorResult:
|
||||
if new_locations:
|
||||
context.log.info(f"Discovered {len(new_locations)} new locations.")
|
||||
|
||||
# Limit to 3 new locations
|
||||
selected = new_locations[:3]
|
||||
return dg.SensorResult(
|
||||
run_requests=[], # dg.RunRequest(partition_key=location) for location in locations],
|
||||
run_requests=[
|
||||
dg.RunRequest(partition_key=location) for location in new_locations[:3]
|
||||
],
|
||||
dynamic_partitions_requests=[
|
||||
location_partitions_def.build_add_request(selected),
|
||||
location_partitions_def.build_add_request(new_locations),
|
||||
latitude_partitions_def.build_add_request(new_latitudes),
|
||||
longitude_partitions_def.build_add_request(new_longitudes),
|
||||
],
|
||||
|
||||
@@ -35,8 +35,8 @@ services:
|
||||
- /opt/dagster/apps/:/code/apps/:ro
|
||||
- /opt/dagster/shared/:/code/shared/:ro
|
||||
- /opt/dagster/logs/:/logs:rw
|
||||
- /opt/dagster/storage/import/:/storage/import/:ro
|
||||
- /opt/dagster/storage/deals/:/storage/deals/:rw
|
||||
# - /mnt/mezzo/scratch/dagster/import/:/storage/import/:ro
|
||||
- /mnt/mezzo/scratch/dagster/deals/:/storage/deals/:rw
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
@@ -53,21 +53,32 @@ services:
|
||||
<<: [ *dagster_env ]
|
||||
DAGSTER_CURRENT_IMAGE: dagster-code-stocks
|
||||
volumes:
|
||||
- /tmp/cache:/cache:rw
|
||||
- /opt/dagster/apps/:/code/apps/:ro
|
||||
- /opt/dagster/shared/:/code/shared/:ro
|
||||
- /opt/dagster/logs/:/logs:rw
|
||||
- /tmp/cache:/cache:rw
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
dagster-code-stocks-playwright:
|
||||
build:
|
||||
context: apps/stocks
|
||||
dockerfile: ../../Dockerfile.code.playwright
|
||||
dockerfile: Dockerfile.code.playwright
|
||||
args:
|
||||
- APP=stocks
|
||||
image: dagster-code-stocks-playwright
|
||||
profiles: [ "never" ]
|
||||
|
||||
dagster-code-backup-base:
|
||||
build:
|
||||
context: apps/backup
|
||||
dockerfile: ../../Dockerfile.code
|
||||
args:
|
||||
- APP=backup
|
||||
image: dagster-code-backup-base
|
||||
profiles: [ "never" ]
|
||||
|
||||
dagster-code-tesla:
|
||||
build:
|
||||
context: apps/tesla
|
||||
@@ -106,6 +117,22 @@ services:
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
dagster-code-backup:
|
||||
build:
|
||||
context: apps/backup
|
||||
container_name: dagster-code-backup
|
||||
image: dagster-code-backup
|
||||
restart: always
|
||||
environment:
|
||||
<<: [ *dagster_env ]
|
||||
DAGSTER_CURRENT_IMAGE: dagster-code-backup
|
||||
volumes:
|
||||
- /opt/dagster/apps/:/code/apps/:ro
|
||||
- /opt/dagster/shared/:/code/shared/:ro
|
||||
- /opt/dagster/logs/:/logs:rw
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
dagster-code-other:
|
||||
build:
|
||||
context: apps/other
|
||||
|
||||
@@ -6,6 +6,7 @@ x-postgres-env: &postgres_env
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
x-system-env: &system_env
|
||||
TZ: Europe/Amsterdam
|
||||
DATA_DIR: ${DATA_DIR}
|
||||
CACHE_DIR: /tmp/cache
|
||||
x-dagster-env: &dagster_env
|
||||
DAGSTER_POSTGRES_HOST: ${POSTGRES_HOST}
|
||||
@@ -26,7 +27,7 @@ x-volumes: &volumes
|
||||
- /opt/dagster/dagster.yaml:/opt/dagster/home/dagster.yaml.template:ro
|
||||
- /opt/dagster/workspace.yaml:/opt/dagster/home/workspace.yaml:ro
|
||||
- /opt/dagster/system/:/code/system/:ro
|
||||
- /opt/dagster/storage/:/storage/:rw
|
||||
- /mnt/mezzo/scratch/dagster/:/storage/:rw
|
||||
- /opt/dagster/logs/:/logs:rw
|
||||
- /var/run/docker.sock:/var/run/docker.sock:rw
|
||||
|
||||
@@ -40,6 +41,8 @@ services:
|
||||
<<: *postgres_env
|
||||
networks:
|
||||
- dagster
|
||||
ports:
|
||||
- '25432:5432'
|
||||
volumes:
|
||||
- /opt/dagster/db/:/var/lib/postgresql/data/
|
||||
|
||||
|
||||
@@ -1,351 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via dagster
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# uvicorn
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via dagster
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -15,11 +15,13 @@ run_launcher:
|
||||
class: CustomDockerRunLauncher
|
||||
config:
|
||||
env_vars:
|
||||
- TZ
|
||||
- DAGSTER_POSTGRES_HOST
|
||||
- DAGSTER_POSTGRES_PORT
|
||||
- DAGSTER_POSTGRES_USER
|
||||
- DAGSTER_POSTGRES_PASSWORD
|
||||
- DAGSTER_POSTGRES_DB
|
||||
- DATA_DIR
|
||||
- SMTP_SERVER
|
||||
- SMTP_PORT
|
||||
- SMTP_USERNAME
|
||||
@@ -32,8 +34,8 @@ run_launcher:
|
||||
volumes:
|
||||
- /opt/dagster/apps/:/code/apps/:ro
|
||||
- /opt/dagster/shared/:/code/shared/:ro
|
||||
- /opt/dagster/storage/:/storage/:rw
|
||||
- /opt/dagster/logs/:/logs:rw
|
||||
- ${DATA_DIR}:/storage/:rw
|
||||
- ${CACHE_DIR}:/cache:rw
|
||||
|
||||
run_storage:
|
||||
|
||||
@@ -1,435 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml --extra=dagster --extra=dev
|
||||
alembic==1.16.4
|
||||
# via dagster
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
antlr4-python3-runtime==4.13.2
|
||||
# via dagster
|
||||
anyio==4.10.0
|
||||
# via
|
||||
# gql
|
||||
# starlette
|
||||
# watchfiles
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
attrs==25.3.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
backoff==2.2.1
|
||||
# via gql
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
black==25.1.0
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
build==1.3.0
|
||||
# via pip-tools
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
cfgv==3.4.0
|
||||
# via pre-commit
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
click==8.1.8
|
||||
# via
|
||||
# black
|
||||
# dagster
|
||||
# dagster-webserver
|
||||
# pip-tools
|
||||
# uvicorn
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
coloredlogs==14.0
|
||||
# via dagster
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dagit==1.11.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-aws
|
||||
# dagster-docker
|
||||
# dagster-duckdb
|
||||
# dagster-duckdb-pandas
|
||||
# dagster-graphql
|
||||
# dagster-polars
|
||||
# dagster-postgres
|
||||
# dagster-webserver
|
||||
dagster-aws==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-docker==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-duckdb==0.27.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
dagster-duckdb-pandas==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-graphql==1.11.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-webserver
|
||||
dagster-pipes==1.11.4
|
||||
# via dagster
|
||||
dagster-polars==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-postgres==0.27.4
|
||||
# via dev (pyproject.toml)
|
||||
dagster-shared==1.11.4
|
||||
# via dagster
|
||||
dagster-webserver==1.11.4
|
||||
# via dagit
|
||||
distlib==0.4.0
|
||||
# via virtualenv
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
docker==7.1.0
|
||||
# via dagster-docker
|
||||
docker-image-py==0.1.13
|
||||
# via dagster-docker
|
||||
docstring-parser==0.17.0
|
||||
# via dagster
|
||||
duckdb==1.3.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastjsonschema==2.21.1
|
||||
# via nbformat
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
filelock==3.18.0
|
||||
# via
|
||||
# dagster
|
||||
# virtualenv
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
# universal-pathlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
gql==3.5.3
|
||||
# via dagster-graphql
|
||||
graphene==3.4.3
|
||||
# via dagster-graphql
|
||||
graphql-core==3.2.6
|
||||
# via
|
||||
# gql
|
||||
# graphene
|
||||
# graphql-relay
|
||||
graphql-relay==3.2.0
|
||||
# via graphene
|
||||
grpcio==1.74.0
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
grpcio-health-checking==1.71.2
|
||||
# via dagster
|
||||
h11==0.16.0
|
||||
# via uvicorn
|
||||
httptools==0.6.4
|
||||
# via uvicorn
|
||||
humanfriendly==10.0
|
||||
# via coloredlogs
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
identify==2.6.12
|
||||
# via pre-commit
|
||||
idna==3.10
|
||||
# via
|
||||
# anyio
|
||||
# email-validator
|
||||
# requests
|
||||
# yarl
|
||||
isort==6.0.1
|
||||
# via dev (pyproject.toml)
|
||||
jinja2==3.1.6
|
||||
# via dagster
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
jsonschema==4.25.0
|
||||
# via nbformat
|
||||
jsonschema-specifications==2025.4.1
|
||||
# via jsonschema
|
||||
jupyter-core==5.8.1
|
||||
# via nbformat
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
mako==1.3.10
|
||||
# via alembic
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==3.0.2
|
||||
# via
|
||||
# jinja2
|
||||
# mako
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
multidict==6.6.3
|
||||
# via yarl
|
||||
mypy==1.17.1
|
||||
# via dev (pyproject.toml)
|
||||
mypy-extensions==1.1.0
|
||||
# via
|
||||
# black
|
||||
# mypy
|
||||
nbformat==5.10.4
|
||||
# via nbstripout
|
||||
nbstripout==0.8.1
|
||||
# via dev (pyproject.toml)
|
||||
nodeenv==1.9.1
|
||||
# via pre-commit
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# black
|
||||
# build
|
||||
# dagster-aws
|
||||
# dagster-shared
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-duckdb-pandas
|
||||
# fastparquet
|
||||
# seaborn
|
||||
pathspec==0.12.1
|
||||
# via
|
||||
# black
|
||||
# mypy
|
||||
patito==0.8.3
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
pip==25.2
|
||||
# via pip-tools
|
||||
pip-tools==7.5.0
|
||||
# via dev (pyproject.toml)
|
||||
platformdirs==4.3.8
|
||||
# via
|
||||
# black
|
||||
# jupyter-core
|
||||
# virtualenv
|
||||
polars==1.32.0
|
||||
# via
|
||||
# dagster-polars
|
||||
# patito
|
||||
pre-commit==4.2.0
|
||||
# via dev (pyproject.toml)
|
||||
propcache==0.3.2
|
||||
# via yarl
|
||||
protobuf==5.29.5
|
||||
# via
|
||||
# dagster
|
||||
# grpcio-health-checking
|
||||
psycopg2-binary==2.9.10
|
||||
# via dagster-postgres
|
||||
pyarrow==21.0.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-polars
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# icecream
|
||||
# rich
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pyproject-hooks==1.2.0
|
||||
# via
|
||||
# build
|
||||
# pip-tools
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# graphene
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via
|
||||
# dagster
|
||||
# pydantic-settings
|
||||
# uvicorn
|
||||
pytz==2025.2
|
||||
# via
|
||||
# dagster
|
||||
# pandas
|
||||
pyyaml==6.0.2
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster-shared
|
||||
# pre-commit
|
||||
# uvicorn
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
regex==2025.7.34
|
||||
# via docker-image-py
|
||||
requests==2.32.4
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
# dagster-aws
|
||||
# dagster-graphql
|
||||
# docker
|
||||
# gql
|
||||
# requests-toolbelt
|
||||
requests-toolbelt==1.0.0
|
||||
# via gql
|
||||
rich==14.1.0
|
||||
# via dagster
|
||||
rpds-py==0.26.0
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
ruff==0.12.7
|
||||
# via dev (pyproject.toml)
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
setuptools==80.9.0
|
||||
# via
|
||||
# dagster
|
||||
# pip-tools
|
||||
six==1.17.0
|
||||
# via
|
||||
# dagster
|
||||
# python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anyio
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.42
|
||||
# via
|
||||
# alembic
|
||||
# dagster
|
||||
starlette==0.47.2
|
||||
# via
|
||||
# dagster-graphql
|
||||
# dagster-webserver
|
||||
structlog==25.4.0
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# dagster
|
||||
tabulate==0.9.0
|
||||
# via dagster
|
||||
tomli==2.2.1
|
||||
# via dagster
|
||||
tomlkit==0.13.3
|
||||
# via dagster-shared
|
||||
toposort==1.10
|
||||
# via dagster
|
||||
tqdm==4.67.1
|
||||
# via dagster
|
||||
traitlets==5.14.3
|
||||
# via
|
||||
# jupyter-core
|
||||
# nbformat
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# alembic
|
||||
# anyio
|
||||
# beautifulsoup4
|
||||
# dagster-polars
|
||||
# dagster-shared
|
||||
# graphene
|
||||
# mypy
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# referencing
|
||||
# sqlalchemy
|
||||
# starlette
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
universal-pathlib==0.2.6
|
||||
# via
|
||||
# dagster
|
||||
# dagster-polars
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# docker
|
||||
# requests
|
||||
uvicorn==0.35.0
|
||||
# via dagster-webserver
|
||||
uvloop==0.21.0
|
||||
# via uvicorn
|
||||
virtualenv==20.33.0
|
||||
# via pre-commit
|
||||
watchdog==5.0.3
|
||||
# via dagster
|
||||
watchfiles==1.1.0
|
||||
# via uvicorn
|
||||
websockets==15.0.1
|
||||
# via uvicorn
|
||||
wheel==0.45.1
|
||||
# via pip-tools
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
yarl==1.20.1
|
||||
# via gql
|
||||
@@ -16,6 +16,7 @@ dependencies = [
|
||||
"openpyxl",
|
||||
"pandas",
|
||||
"patito",
|
||||
"polars==1.32.0",
|
||||
"pyarrow",
|
||||
"pydantic[email]",
|
||||
"pydantic-settings",
|
||||
@@ -43,12 +44,12 @@ local = [
|
||||
"ipywidgets"
|
||||
]
|
||||
dagster = [
|
||||
"dagster",
|
||||
"dagster==1.11.4",
|
||||
"dagster-graphql",
|
||||
"dagster-postgres",
|
||||
"dagster-docker",
|
||||
"dagster-aws",
|
||||
"dagster-polars[patito]",
|
||||
"dagster-polars[patito]==0.27.4",
|
||||
"dagster-duckdb",
|
||||
"dagster-duckdb-pandas",
|
||||
"dagit"
|
||||
@@ -65,6 +66,9 @@ weather = [
|
||||
"requests_cache",
|
||||
"retry_requests"
|
||||
]
|
||||
backup = [
|
||||
"paramiko"
|
||||
]
|
||||
other = [
|
||||
# "deltalake>=1.0.0",
|
||||
# "dagster-deltalake-pandas",
|
||||
|
||||
@@ -5,4 +5,5 @@ uv pip compile pyproject.toml --extra=dagster --extra=vinyl > apps/vinyl/require
|
||||
uv pip compile pyproject.toml --extra=dagster --extra=stocks > apps/stocks/requirements.txt
|
||||
uv pip compile pyproject.toml --extra=dagster --extra=tesla > apps/tesla/requirements.txt
|
||||
uv pip compile pyproject.toml --extra=dagster --extra=weather > apps/weather/requirements.txt
|
||||
uv pip compile pyproject.toml --extra=dagster --extra=backup > apps/backup/requirements.txt
|
||||
uv pip compile pyproject.toml --extra=dagster --extra=other > apps/other/requirements.txt
|
||||
|
||||
152
requirements.txt
152
requirements.txt
@@ -1,152 +0,0 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
asttokens==3.0.0
|
||||
# via icecream
|
||||
beautifulsoup4==4.13.4
|
||||
# via dev (pyproject.toml)
|
||||
boto3==1.40.1
|
||||
# via dev (pyproject.toml)
|
||||
botocore==1.40.1
|
||||
# via
|
||||
# boto3
|
||||
# s3fs
|
||||
# s3transfer
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
charset-normalizer==3.4.2
|
||||
# via requests
|
||||
colorama==0.4.6
|
||||
# via icecream
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cramjam==2.11.0
|
||||
# via fastparquet
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dnspython==2.7.0
|
||||
# via email-validator
|
||||
duckdb==1.3.2
|
||||
# via dev (pyproject.toml)
|
||||
email-validator==2.2.0
|
||||
# via pydantic
|
||||
et-xmlfile==2.0.0
|
||||
# via openpyxl
|
||||
executing==2.2.0
|
||||
# via icecream
|
||||
fastparquet==2024.11.0
|
||||
# via dev (pyproject.toml)
|
||||
fonttools==4.59.0
|
||||
# via matplotlib
|
||||
fsspec==2025.7.0
|
||||
# via
|
||||
# fastparquet
|
||||
# s3fs
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via dev (pyproject.toml)
|
||||
icecream==2.1.5
|
||||
# via dev (pyproject.toml)
|
||||
idna==3.10
|
||||
# via
|
||||
# email-validator
|
||||
# requests
|
||||
jmespath==1.0.1
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
kiwisolver==1.4.8
|
||||
# via matplotlib
|
||||
lxml==6.0.0
|
||||
# via dev (pyproject.toml)
|
||||
matplotlib==3.10.5
|
||||
# via seaborn
|
||||
numpy==2.3.2
|
||||
# via
|
||||
# contourpy
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
# pandas
|
||||
# seaborn
|
||||
openpyxl==3.1.5
|
||||
# via dev (pyproject.toml)
|
||||
packaging==25.0
|
||||
# via
|
||||
# fastparquet
|
||||
# matplotlib
|
||||
pandas==2.3.1
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# fastparquet
|
||||
# seaborn
|
||||
patito==0.8.3
|
||||
# via dev (pyproject.toml)
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
polars==1.32.0
|
||||
# via patito
|
||||
pyarrow==21.0.0
|
||||
# via dev (pyproject.toml)
|
||||
pydantic==2.11.7
|
||||
# via
|
||||
# dev (pyproject.toml)
|
||||
# patito
|
||||
# pydantic-settings
|
||||
pydantic-core==2.33.2
|
||||
# via pydantic
|
||||
pydantic-settings==2.10.1
|
||||
# via dev (pyproject.toml)
|
||||
pygments==2.19.2
|
||||
# via icecream
|
||||
pyparsing==3.2.3
|
||||
# via matplotlib
|
||||
pysocks==1.7.1
|
||||
# via requests
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# botocore
|
||||
# matplotlib
|
||||
# pandas
|
||||
python-dotenv==1.1.1
|
||||
# via pydantic-settings
|
||||
pytz==2025.2
|
||||
# via pandas
|
||||
pyyaml==6.0.2
|
||||
# via dev (pyproject.toml)
|
||||
requests==2.32.4
|
||||
# via dev (pyproject.toml)
|
||||
s3fs==0.4.2
|
||||
# via dev (pyproject.toml)
|
||||
s3transfer==0.13.1
|
||||
# via boto3
|
||||
seaborn==0.13.2
|
||||
# via dev (pyproject.toml)
|
||||
six==1.17.0
|
||||
# via python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
soupsieve==2.7
|
||||
# via beautifulsoup4
|
||||
structlog==25.4.0
|
||||
# via dev (pyproject.toml)
|
||||
typing-extensions==4.14.1
|
||||
# via
|
||||
# beautifulsoup4
|
||||
# patito
|
||||
# pydantic
|
||||
# pydantic-core
|
||||
# typing-inspection
|
||||
typing-inspection==0.4.1
|
||||
# via
|
||||
# pydantic
|
||||
# pydantic-settings
|
||||
tzdata==2025.2
|
||||
# via pandas
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# botocore
|
||||
# requests
|
||||
xlsxwriter==3.2.5
|
||||
# via dev (pyproject.toml)
|
||||
@@ -6,6 +6,10 @@ from pydantic import Field, PrivateAttr
|
||||
from upath import UPath
|
||||
|
||||
import dagster as dg
|
||||
from dagster import (
|
||||
InputContext,
|
||||
OutputContext,
|
||||
)
|
||||
|
||||
|
||||
def _process_env_vars(config: dict[str, Any]) -> dict[str, Any]:
|
||||
@@ -60,12 +64,26 @@ class BaseIOManager(dg.ConfigurableIOManager, dg.UPathIOManager, ABC):
|
||||
with path.open("r") as fp:
|
||||
return json.load(fp)
|
||||
|
||||
def get_path_for_partition(
|
||||
self, context: InputContext | OutputContext, path: "UPath", partition: str
|
||||
) -> UPath:
|
||||
"""Use path from metadata when provided."""
|
||||
ic()
|
||||
context_metadata = context.output_metadata or {}
|
||||
ic(context_metadata)
|
||||
|
||||
if "path" in context_metadata:
|
||||
return UPath(*context_metadata["path"].value)
|
||||
return super().get_path_for_partition(context)
|
||||
|
||||
def get_asset_relative_path(
|
||||
self, context: dg.InputContext | dg.OutputContext
|
||||
) -> UPath:
|
||||
"""Get the relative path for the asset based on context metadata."""
|
||||
ic()
|
||||
context_metadata = context.output_metadata or {}
|
||||
ic(context_metadata)
|
||||
|
||||
path_prefix = (
|
||||
context_metadata["path_prefix"].value
|
||||
if "path_prefix" in context_metadata
|
||||
|
||||
@@ -15,6 +15,10 @@ load_from:
|
||||
location_name: weather
|
||||
host: dagster-code-weather
|
||||
port: 4000
|
||||
- grpc_server:
|
||||
location_name: backup
|
||||
host: dagster-code-backup
|
||||
port: 4000
|
||||
- grpc_server:
|
||||
location_name: other
|
||||
host: dagster-code-other
|
||||
|
||||
Reference in New Issue
Block a user