add some dagster files

This commit is contained in:
2024-11-14 14:13:49 +01:00
parent 384bf4c4b8
commit ddc3c589ed
8 changed files with 331 additions and 0 deletions

20
dagster/Dockerfile.code Executable file
View File

@@ -0,0 +1,20 @@
FROM python:3.12-slim
# Checkout and install dagster libraries needed to run the gRPC server
# exposing your repository to dagit and dagster-daemon, and to load the DagsterInstance
COPY dagster-requirements.txt requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
RUN uv pip install polars-lts-cpu --system
# Add repository code
WORKDIR /opt/dagster/home
# Run dagster gRPC server on port 4000
EXPOSE 4000
# CMD allows this to be overridden from run launchers or executors that want
# to run other commands against your repository
#CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", "-f", "repo.py"]
CMD ["dagster", "code-server", "start", "-h", "0.0.0.0", "-p", "4000", "-f", "repo.py"]

18
dagster/Dockerfile.system Executable file
View File

@@ -0,0 +1,18 @@
# Dagster libraries to run both dagit and the dagster-daemon. Does not
# need to have access to any pipeline code.
FROM python:3.12-slim
COPY dagster-requirements.txt requirements.txt
RUN pip install uv
RUN uv pip install -r requirements.txt --system
RUN uv pip install polars-lts-cpu --system
# Set $DAGSTER_HOME and copy dagster instance and workspace YAML there
ENV DAGSTER_HOME=/opt/dagster/home/
RUN mkdir -p $DAGSTER_HOME
COPY dagster.yaml workspace.yaml $DAGSTER_HOME
WORKDIR $DAGSTER_HOME

69
dagster/dagster.yaml Normal file
View File

@@ -0,0 +1,69 @@
telemetry:
enabled: false
concurrency:
default_op_concurrency_limit: 2
run_coordinator:
module: dagster.core.run_coordinator
class: QueuedRunCoordinator
run_launcher:
module: dagster_docker
class: DockerRunLauncher
config:
env_vars:
- DAGSTER_POSTGRES_USER
- DAGSTER_POSTGRES_PASSWORD
- DAGSTER_POSTGRES_DB
network: dagster
container_kwargs:
volumes:
- /opt/dagster/src/app/:/opt/dagster/home/app/
- /opt/dagster/src/repo.py:/opt/dagster/home/repo.py
# - /opt/dagster/storage/:/opt/dagster/home/storage/
- /opt/dagster/storage/import/:/opt/dagster/home/storage/import/
- /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/
run_storage:
module: dagster_postgres.run_storage
class: PostgresRunStorage
config:
postgres_db:
hostname: postgresql
username:
env: DAGSTER_POSTGRES_USER
password:
env: DAGSTER_POSTGRES_PASSWORD
db_name:
env: DAGSTER_POSTGRES_DB
port: 5432
schedule_storage:
module: dagster_postgres.schedule_storage
class: PostgresScheduleStorage
config:
postgres_db:
hostname: postgresql
username:
env: DAGSTER_POSTGRES_USER
password:
env: DAGSTER_POSTGRES_PASSWORD
db_name:
env: DAGSTER_POSTGRES_DB
port: 5432
event_log_storage:
module: dagster_postgres.event_log
class: PostgresEventLogStorage
config:
postgres_db:
hostname: postgresql
username:
env: DAGSTER_POSTGRES_USER
password:
env: DAGSTER_POSTGRES_PASSWORD
db_name:
env: DAGSTER_POSTGRES_DB
port: 5432

View File

@@ -0,0 +1,47 @@
x-dagster-env: &dagster_env
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
DAGSTER_CURRENT_IMAGE: ${DAGSTER_CURRENT_IMAGE}
x-volumes: &volumes
volumes:
#- /opt/dagster/storage/:/opt/dagster/home/storage/
- /opt/dagster/storage/import/:/opt/dagster/home/storage/import/
- /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/
- /opt/dagster/src/app/:/opt/dagster/home/app/
- /opt/dagster/src/repo.py:/opt/dagster/home/repo.py
services:
# This service runs the gRPC server that loads your user code, in both dagit
# and dagster-daemon. By setting DAGSTER_CURRENT_IMAGE to its own image, we tell the
# run launcher to use this same image when launching runs in a new container as well.
# Multiple containers like this can be deployed separately - each just needs to run on
# its own port, and have its own entry in the workspace.yaml file that's loaded by dagit.
user_code:
build:
context: .
dockerfile: Dockerfile.code
container_name: user_code
image: user_code_image
restart: always
environment:
<<: *dagster_env
<<: *volumes
networks:
- dagster
other_image:
profiles: [ disabled ]
build:
context: .
dockerfile: Dockerfile
container_name: other_image
image: user_code_image
restart: always
environment:
<<: *dagster_env
DAGSTER_CURRENT_IMAGE: something_else
<<: *volumes
networks:
- dagster

View File

@@ -0,0 +1,90 @@
x-postgres-env: &postgres_env
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
x-aws-env: &aws_env
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
x-dagster-env: &dagster_env
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
DAGSTER_CURRENT_IMAGE: ${DAGSTER_CURRENT_IMAGE}
x-volumes: &volumes
volumes:
- /opt/dagster/dagster.yaml:/opt/dagster/home/dagster.yaml
- /opt/dagster/workspace.yaml:/opt/dagster/home/workspace.yaml
- /var/run/docker.sock:/var/run/docker.sock
#- /opt/dagster/storage/:/opt/dagster/home/storage/
- /opt/dagster/storage/import/:/opt/dagster/home/storage/import/
- /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/
- /opt/dagster/src/app/:/opt/dagster/home/app/
- /opt/dagster/src/repo.py:/opt/dagster/home/repo.py
# - /opt/homebrew/Caskroom/mambaforge/base/envs:/opt/homebrew/Caskroom/mambaforge/base/envs
# Towel
# - /opt/dagster/src/towel.py:/opt/dagster/home/towel.py
# - /Users/rik/Seafile/Code/company/navara/Klanten/Eneco/towel/towel:/opt/dagster/home/app/towel
# - /Users/rik/Library/Caches/pypoetry/virtualenvs/towel-V7mtCF2c-py3.9:/venv/towel
services:
# This service runs the postgres DB used by dagster for run storage, schedule storage,
# and event log storage.
postgresql:
image: postgres:11
container_name: postgresql
environment:
<<: *postgres_env
networks:
- dagster
# This service runs dagit, which loads your user code from the user code container.
# Since our instance uses the QueuedRunCoordinator, any runs submitted from dagit will be put on
# a queue and later dequeued and launched by dagster-daemon.
dagit:
build:
context: .
dockerfile: Dockerfile.system
entrypoint:
- dagster-webserver
- -h
- "0.0.0.0"
- -p
- "3000"
- -w
- workspace.yaml
container_name: dagit
expose:
- "3000"
ports:
- "3000:3000"
environment:
<<: *dagster_env
<<: *volumes
networks:
- dagster
depends_on:
- postgresql
- user_code
# This service runs the dagster-daemon process, which is responsible for taking runs
# off of the queue and launching them, as well as creating runs from schedules or sensors.
daemon:
build:
context: .
dockerfile: Dockerfile.system
entrypoint:
- dagster-daemon
- run
container_name: daemon
restart: on-failure
environment:
<<: [ *dagster_env, *aws_env ]
<<: *volumes
networks:
- dagster
depends_on:
- postgresql

View File

@@ -0,0 +1,8 @@
networks:
dagster:
driver: bridge
name: dagster
include:
- docker-compose.system.yaml
- docker-compose.code.yaml

75
dagster/pyproject.toml Executable file
View File

@@ -0,0 +1,75 @@
[project]
requires-python = "==3.12"
name = "dev"
authors = [
{ name = "Rik Veenboer", email = "rik.veenboer@gmail.com" }
]
version = "0.1.0"
dependencies = [
"fastapi",
"gitpython",
"kubernetes",
"matplotlib",
"seaborn",
"openpyxl",
"xlsxwriter",
"pandas",
"pyarrow",
"pydantic[email]",
"pydantic-settings",
"pyyaml",
"requests",
"s3fs[boto3]",
"structlog",
"uvicorn",
"duckdb",
"geopandas",
"lxml",
"networkx",
"Pint",
"Pint-Pandas",
"boto3",
"influxdb-client",
"requests[socks]",
"beautifulsoup4",
"fastparquet",
"icecream"
]
[project.optional-dependencies]
dev = [
"black",
"isort",
"nbstripout",
"pip-tools",
"pre-commit",
"ruff",
"mypy"
]
local = [
"ipykernel",
"ipywidgets"
]
dagster = [
"dagster",
"dagster-graphql",
"dagster-postgres",
"dagster-docker",
"dagster-aws",
"dagster-polars",
"dagster-duckdb",
"dagster-duckdb-pandas",
"dagit"
]
[tool.poetry]
name = "dev"
version = "0.1.0"
description = ""
authors = ["Rik Veenboer <rik.veenboer@gmail.com>"]
[tool.poetry.dependencies]
seven = "^1.0.0"
[tool.ruff]
builtins = ["ic"]

4
dagster/workspace.yaml Normal file
View File

@@ -0,0 +1,4 @@
load_from:
- grpc_server:
host: user_code
port: 4000