feat: initial project setup

This commit is contained in:
Stijnvandenbroek
2026-03-03 12:18:30 +00:00
parent 62af3114be
commit 7dd520cf41
14 changed files with 295 additions and 1 deletions

12
.env.example Normal file
View File

@@ -0,0 +1,12 @@
# ── PostgreSQL ───────────────────────────────────────────────────────────────
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=dagster
POSTGRES_PASSWORD=changeme
POSTGRES_DB=dagster
# ── Dagster metadata storage (uses the same postgres instance) ───────────────
DAGSTER_POSTGRES_URL=postgresql://dagster:changeme@postgres:5432/dagster
# ── dbt profile target (overrides profiles.yml env_var defaults) ─────────────
DBT_TARGET=dev

27
.gitignore vendored Normal file
View File

@@ -0,0 +1,27 @@
# Environment / secrets
.env
# Python
__pycache__/
*.py[cod]
.venv/
*.egg-info/
dist/
build/
# uv
uv.lock
# dbt
dbt/target/
dbt/dbt_packages/
dbt/logs/
# Dagster
dagster_home/storage/
dagster_home/logs/
dagster_home/schedule_logs/
dagster_home/compute_logs/
# Docker
*.log

17
Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM python:3.12-slim
WORKDIR /app
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# Install dependencies before copying full source (layer caching)
COPY pyproject.toml uv.lock* ./
RUN uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
# Copy application source
COPY . .
# Make the venv's binaries available on PATH
ENV PATH="/app/.venv/bin:$PATH"
ENV DAGSTER_HOME=/app/dagster_home

View File

@@ -1 +1,63 @@
# data-platform
A [Dagster](https://dagster.io/) + [dbt](https://www.getdbt.com/) data platform,
managed with [uv](https://github.com/astral-sh/uv) and deployed via Docker Compose.
## Stack
| Layer | Tool |
|---|---|
| Orchestration | Dagster (webserver + daemon) |
| Transformation | dbt-core + dbt-postgres |
| Storage | PostgreSQL 16 |
| Package/venv | uv |
| Secrets | `.env` file |
## Project layout
```
data_platform/ # Dagster Python package (assets, definitions)
dbt/ # dbt project (models, seeds, tests)
profiles.yml # reads credentials from env vars
dagster_home/ # dagster.yaml + workspace.yaml
Dockerfile # single image used by both dagster services
docker-compose.yaml # postgres + dagster-webserver + dagster-daemon
.env.example # copy to .env and fill in credentials
pyproject.toml # uv-managed dependencies
```
## Getting started
```bash
# 1. Install uv (if not already)
curl -Lsf https://astral.sh/uv/install.sh | sh
# 2. Clone and enter the project
cd ~/git/data-platform
# 3. Create your credentials file
cp .env.example .env
# Edit .env with your passwords
# 4. Install dependencies into a local venv
uv sync
# 5. Generate the dbt manifest (needed before first run)
uv run dbt parse --profiles-dir dbt --project-dir dbt
# 6. Start all services
docker compose up -d --build
# 7. Open the Dagster UI
# http://localhost:3000
```
## Local development (without Docker)
```bash
uv sync
source .venv/bin/activate
# Run the Dagster UI locally
DAGSTER_HOME=$PWD/dagster_home dagster dev
```

View File

@@ -0,0 +1,6 @@
# Dagster stores run history, event logs, and schedules in PostgreSQL.
# Connection URL is read from the DAGSTER_POSTGRES_URL environment variable.
storage:
postgres:
postgres_url:
env: DAGSTER_POSTGRES_URL

View File

@@ -0,0 +1,4 @@
load_from:
- python_package:
package_name: data_platform
attribute: defs

View File

@@ -0,0 +1,3 @@
from data_platform.definitions import defs
__all__ = ["defs"]

View File

@@ -0,0 +1,36 @@
from pathlib import Path
from dagster import Definitions
from dagster_dbt import DbtCliResource, DbtProject, dbt_assets
# ---------------------------------------------------------------------------
# dbt project
# ---------------------------------------------------------------------------
DBT_PROJECT_DIR = Path(__file__).parent.parent / "dbt"
dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
# When running locally outside Docker, generate/refresh the manifest automatically.
dbt_project.prepare_if_dev()
# ---------------------------------------------------------------------------
# dbt assets every dbt model/test/snapshot becomes a Dagster asset
# ---------------------------------------------------------------------------
@dbt_assets(manifest=dbt_project.manifest_path)
def dbt_project_assets(context, dbt: DbtCliResource):
yield from dbt.cli(["build"], context=context).stream()
# ---------------------------------------------------------------------------
# Definitions
# ---------------------------------------------------------------------------
defs = Definitions(
assets=[dbt_project_assets],
resources={
"dbt": DbtCliResource(project_dir=str(DBT_PROJECT_DIR)),
},
)

22
dbt/dbt_project.yml Normal file
View File

@@ -0,0 +1,22 @@
name: data_platform
version: "1.0.0"
profile: data_platform
model-paths: ["models"]
seed-paths: ["seeds"]
test-paths: ["tests"]
analysis-paths: ["analyses"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target"
clean-targets:
- "target"
- "dbt_packages"
models:
data_platform:
staging:
+materialized: view
marts:
+materialized: table

View File

@@ -0,0 +1,12 @@
version: 2
models:
- name: stg_example
description: >
A placeholder staging model. Replace with your actual source tables.
columns:
- name: id
description: Primary key.
tests:
- unique
- not_null

View File

@@ -0,0 +1,5 @@
-- Placeholder staging model.
-- Replace this with your actual source query, e.g.:
-- select * from {{ source('my_source', 'my_table') }}
select 1 as id, 'example' as name

12
dbt/profiles.yml Normal file
View File

@@ -0,0 +1,12 @@
data_platform:
target: "{{ env_var('DBT_TARGET', 'dev') }}"
outputs:
dev:
type: postgres
host: "{{ env_var('POSTGRES_HOST', 'localhost') }}"
port: "{{ env_var('POSTGRES_PORT', '5432') | int }}"
user: "{{ env_var('POSTGRES_USER') }}"
password: "{{ env_var('POSTGRES_PASSWORD') }}"
dbname: "{{ env_var('POSTGRES_DB') }}"
schema: staging
threads: 4

51
docker-compose.yaml Normal file
View File

@@ -0,0 +1,51 @@
version: "3.9"
# Shared config for all dagster services
x-dagster: &dagster-common
build:
context: .
dockerfile: Dockerfile
env_file: .env
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
services:
# Metadata storage and dbt target
postgres:
image: postgres:16
container_name: postgres
restart: unless-stopped
env_file: .env
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
volumes:
- postgres-data:/var/lib/postgresql/data
ports:
- "10.0.0.108:5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
interval: 10s
timeout: 5s
retries: 5
# Web UI
dagster-webserver:
<<: *dagster-common
container_name: dagster-webserver
command: ["dagster-webserver", "-h", "0.0.0.0", "-p", "3000"]
ports:
- "3000:3000"
# Schedules, sensors and run queuing
dagster-daemon:
<<: *dagster-common
container_name: dagster-daemon
command: ["dagster-daemon", "run"]
volumes:
postgres-data:

25
pyproject.toml Normal file
View File

@@ -0,0 +1,25 @@
[project]
name = "data-platform"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = [
"dagster",
"dagster-webserver",
"dagster-postgres",
"dagster-dbt",
"dbt-core",
"dbt-postgres",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["data_platform"]
[tool.uv]
dev-dependencies = [
"pytest",
"dagster-webserver",
]