diff --git a/dagster/Dockerfile.code b/dagster/Dockerfile.code new file mode 100755 index 0000000..ac97ab0 --- /dev/null +++ b/dagster/Dockerfile.code @@ -0,0 +1,20 @@ +FROM python:3.12-slim + +# Checkout and install dagster libraries needed to run the gRPC server +# exposing your repository to dagit and dagster-daemon, and to load the DagsterInstance + +COPY dagster-requirements.txt requirements.txt +RUN pip install uv +RUN uv pip install -r requirements.txt --system +RUN uv pip install polars-lts-cpu --system + +# Add repository code +WORKDIR /opt/dagster/home + +# Run dagster gRPC server on port 4000 +EXPOSE 4000 + +# CMD allows this to be overridden from run launchers or executors that want +# to run other commands against your repository +#CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", "-f", "repo.py"] +CMD ["dagster", "code-server", "start", "-h", "0.0.0.0", "-p", "4000", "-f", "repo.py"] diff --git a/dagster/Dockerfile.system b/dagster/Dockerfile.system new file mode 100755 index 0000000..a23e6ff --- /dev/null +++ b/dagster/Dockerfile.system @@ -0,0 +1,18 @@ +# Dagster libraries to run both dagit and the dagster-daemon. Does not +# need to have access to any pipeline code. + +FROM python:3.12-slim + +COPY dagster-requirements.txt requirements.txt +RUN pip install uv +RUN uv pip install -r requirements.txt --system +RUN uv pip install polars-lts-cpu --system + +# Set $DAGSTER_HOME and copy dagster instance and workspace YAML there +ENV DAGSTER_HOME=/opt/dagster/home/ + +RUN mkdir -p $DAGSTER_HOME + +COPY dagster.yaml workspace.yaml $DAGSTER_HOME + +WORKDIR $DAGSTER_HOME diff --git a/dagster/dagster.yaml b/dagster/dagster.yaml new file mode 100644 index 0000000..b9d60ba --- /dev/null +++ b/dagster/dagster.yaml @@ -0,0 +1,69 @@ +telemetry: + enabled: false + +concurrency: + default_op_concurrency_limit: 2 + +run_coordinator: + module: dagster.core.run_coordinator + class: QueuedRunCoordinator + +run_launcher: + module: dagster_docker + class: DockerRunLauncher + config: + env_vars: + - DAGSTER_POSTGRES_USER + - DAGSTER_POSTGRES_PASSWORD + - DAGSTER_POSTGRES_DB + network: dagster + container_kwargs: + volumes: + - /opt/dagster/src/app/:/opt/dagster/home/app/ + - /opt/dagster/src/repo.py:/opt/dagster/home/repo.py + + # - /opt/dagster/storage/:/opt/dagster/home/storage/ + - /opt/dagster/storage/import/:/opt/dagster/home/storage/import/ + - /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/ + +run_storage: + module: dagster_postgres.run_storage + class: PostgresRunStorage + config: + postgres_db: + hostname: postgresql + username: + env: DAGSTER_POSTGRES_USER + password: + env: DAGSTER_POSTGRES_PASSWORD + db_name: + env: DAGSTER_POSTGRES_DB + port: 5432 + +schedule_storage: + module: dagster_postgres.schedule_storage + class: PostgresScheduleStorage + config: + postgres_db: + hostname: postgresql + username: + env: DAGSTER_POSTGRES_USER + password: + env: DAGSTER_POSTGRES_PASSWORD + db_name: + env: DAGSTER_POSTGRES_DB + port: 5432 + +event_log_storage: + module: dagster_postgres.event_log + class: PostgresEventLogStorage + config: + postgres_db: + hostname: postgresql + username: + env: DAGSTER_POSTGRES_USER + password: + env: DAGSTER_POSTGRES_PASSWORD + db_name: + env: DAGSTER_POSTGRES_DB + port: 5432 diff --git a/dagster/docker-compose.code.yaml b/dagster/docker-compose.code.yaml new file mode 100644 index 0000000..6b87332 --- /dev/null +++ b/dagster/docker-compose.code.yaml @@ -0,0 +1,47 @@ +x-dagster-env: &dagster_env + DAGSTER_POSTGRES_USER: ${POSTGRES_USER} + DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + DAGSTER_POSTGRES_DB: ${POSTGRES_DB} + DAGSTER_CURRENT_IMAGE: ${DAGSTER_CURRENT_IMAGE} + +x-volumes: &volumes + volumes: + #- /opt/dagster/storage/:/opt/dagster/home/storage/ + - /opt/dagster/storage/import/:/opt/dagster/home/storage/import/ + - /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/ + - /opt/dagster/src/app/:/opt/dagster/home/app/ + - /opt/dagster/src/repo.py:/opt/dagster/home/repo.py + +services: + # This service runs the gRPC server that loads your user code, in both dagit + # and dagster-daemon. By setting DAGSTER_CURRENT_IMAGE to its own image, we tell the + # run launcher to use this same image when launching runs in a new container as well. + # Multiple containers like this can be deployed separately - each just needs to run on + # its own port, and have its own entry in the workspace.yaml file that's loaded by dagit. + user_code: + build: + context: . + dockerfile: Dockerfile.code + container_name: user_code + image: user_code_image + restart: always + environment: + <<: *dagster_env + <<: *volumes + networks: + - dagster + + other_image: + profiles: [ disabled ] + build: + context: . + dockerfile: Dockerfile + container_name: other_image + image: user_code_image + restart: always + environment: + <<: *dagster_env + DAGSTER_CURRENT_IMAGE: something_else + <<: *volumes + networks: + - dagster diff --git a/dagster/docker-compose.system.yaml b/dagster/docker-compose.system.yaml new file mode 100644 index 0000000..cd63be3 --- /dev/null +++ b/dagster/docker-compose.system.yaml @@ -0,0 +1,90 @@ +x-postgres-env: &postgres_env + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} +x-aws-env: &aws_env + AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} + AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} +x-dagster-env: &dagster_env + DAGSTER_POSTGRES_USER: ${POSTGRES_USER} + DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + DAGSTER_POSTGRES_DB: ${POSTGRES_DB} + DAGSTER_CURRENT_IMAGE: ${DAGSTER_CURRENT_IMAGE} + +x-volumes: &volumes + volumes: + - /opt/dagster/dagster.yaml:/opt/dagster/home/dagster.yaml + - /opt/dagster/workspace.yaml:/opt/dagster/home/workspace.yaml + - /var/run/docker.sock:/var/run/docker.sock + + #- /opt/dagster/storage/:/opt/dagster/home/storage/ + - /opt/dagster/storage/import/:/opt/dagster/home/storage/import/ + - /opt/dagster/storage/deals/:/opt/dagster/home/storage/deals/ + + - /opt/dagster/src/app/:/opt/dagster/home/app/ + - /opt/dagster/src/repo.py:/opt/dagster/home/repo.py + # - /opt/homebrew/Caskroom/mambaforge/base/envs:/opt/homebrew/Caskroom/mambaforge/base/envs + + # Towel + # - /opt/dagster/src/towel.py:/opt/dagster/home/towel.py + # - /Users/rik/Seafile/Code/company/navara/Klanten/Eneco/towel/towel:/opt/dagster/home/app/towel + # - /Users/rik/Library/Caches/pypoetry/virtualenvs/towel-V7mtCF2c-py3.9:/venv/towel + +services: + # This service runs the postgres DB used by dagster for run storage, schedule storage, + # and event log storage. + postgresql: + image: postgres:11 + container_name: postgresql + environment: + <<: *postgres_env + networks: + - dagster + + # This service runs dagit, which loads your user code from the user code container. + # Since our instance uses the QueuedRunCoordinator, any runs submitted from dagit will be put on + # a queue and later dequeued and launched by dagster-daemon. + dagit: + build: + context: . + dockerfile: Dockerfile.system + entrypoint: + - dagster-webserver + - -h + - "0.0.0.0" + - -p + - "3000" + - -w + - workspace.yaml + container_name: dagit + expose: + - "3000" + ports: + - "3000:3000" + environment: + <<: *dagster_env + <<: *volumes + networks: + - dagster + depends_on: + - postgresql + - user_code + + # This service runs the dagster-daemon process, which is responsible for taking runs + # off of the queue and launching them, as well as creating runs from schedules or sensors. + daemon: + build: + context: . + dockerfile: Dockerfile.system + entrypoint: + - dagster-daemon + - run + container_name: daemon + restart: on-failure + environment: + <<: [ *dagster_env, *aws_env ] + <<: *volumes + networks: + - dagster + depends_on: + - postgresql diff --git a/dagster/docker-compose.yaml b/dagster/docker-compose.yaml new file mode 100644 index 0000000..421ce12 --- /dev/null +++ b/dagster/docker-compose.yaml @@ -0,0 +1,8 @@ +networks: + dagster: + driver: bridge + name: dagster + +include: + - docker-compose.system.yaml + - docker-compose.code.yaml \ No newline at end of file diff --git a/dagster/pyproject.toml b/dagster/pyproject.toml new file mode 100755 index 0000000..3d48deb --- /dev/null +++ b/dagster/pyproject.toml @@ -0,0 +1,75 @@ +[project] +requires-python = "==3.12" +name = "dev" +authors = [ + { name = "Rik Veenboer", email = "rik.veenboer@gmail.com" } +] +version = "0.1.0" +dependencies = [ + "fastapi", + "gitpython", + "kubernetes", + "matplotlib", + "seaborn", + "openpyxl", + "xlsxwriter", + "pandas", + "pyarrow", + "pydantic[email]", + "pydantic-settings", + "pyyaml", + "requests", + "s3fs[boto3]", + "structlog", + "uvicorn", + "duckdb", + "geopandas", + "lxml", + "networkx", + "Pint", + "Pint-Pandas", + "boto3", + "influxdb-client", + "requests[socks]", + "beautifulsoup4", + "fastparquet", + "icecream" +] + +[project.optional-dependencies] +dev = [ + "black", + "isort", + "nbstripout", + "pip-tools", + "pre-commit", + "ruff", + "mypy" +] +local = [ + "ipykernel", + "ipywidgets" +] +dagster = [ + "dagster", + "dagster-graphql", + "dagster-postgres", + "dagster-docker", + "dagster-aws", + "dagster-polars", + "dagster-duckdb", + "dagster-duckdb-pandas", + "dagit" +] + +[tool.poetry] +name = "dev" +version = "0.1.0" +description = "" +authors = ["Rik Veenboer "] + +[tool.poetry.dependencies] +seven = "^1.0.0" + +[tool.ruff] +builtins = ["ic"] diff --git a/dagster/workspace.yaml b/dagster/workspace.yaml new file mode 100644 index 0000000..47124fd --- /dev/null +++ b/dagster/workspace.yaml @@ -0,0 +1,4 @@ +load_from: + - grpc_server: + host: user_code + port: 4000