feat: initial project setup
This commit is contained in:
12
.env.example
Normal file
12
.env.example
Normal file
@@ -0,0 +1,12 @@
|
||||
# ── PostgreSQL ───────────────────────────────────────────────────────────────
|
||||
POSTGRES_HOST=postgres
|
||||
POSTGRES_PORT=5432
|
||||
POSTGRES_USER=dagster
|
||||
POSTGRES_PASSWORD=changeme
|
||||
POSTGRES_DB=dagster
|
||||
|
||||
# ── Dagster metadata storage (uses the same postgres instance) ───────────────
|
||||
DAGSTER_POSTGRES_URL=postgresql://dagster:changeme@postgres:5432/dagster
|
||||
|
||||
# ── dbt profile target (overrides profiles.yml env_var defaults) ─────────────
|
||||
DBT_TARGET=dev
|
||||
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# Environment / secrets
|
||||
.env
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.venv/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
|
||||
# uv
|
||||
uv.lock
|
||||
|
||||
# dbt
|
||||
dbt/target/
|
||||
dbt/dbt_packages/
|
||||
dbt/logs/
|
||||
|
||||
# Dagster
|
||||
dagster_home/storage/
|
||||
dagster_home/logs/
|
||||
dagster_home/schedule_logs/
|
||||
dagster_home/compute_logs/
|
||||
|
||||
# Docker
|
||||
*.log
|
||||
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install uv
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
|
||||
# Install dependencies before copying full source (layer caching)
|
||||
COPY pyproject.toml uv.lock* ./
|
||||
RUN uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
|
||||
|
||||
# Copy application source
|
||||
COPY . .
|
||||
|
||||
# Make the venv's binaries available on PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
ENV DAGSTER_HOME=/app/dagster_home
|
||||
64
README.md
64
README.md
@@ -1 +1,63 @@
|
||||
# data-platform
|
||||
# data-platform
|
||||
|
||||
A [Dagster](https://dagster.io/) + [dbt](https://www.getdbt.com/) data platform,
|
||||
managed with [uv](https://github.com/astral-sh/uv) and deployed via Docker Compose.
|
||||
|
||||
## Stack
|
||||
|
||||
| Layer | Tool |
|
||||
|---|---|
|
||||
| Orchestration | Dagster (webserver + daemon) |
|
||||
| Transformation | dbt-core + dbt-postgres |
|
||||
| Storage | PostgreSQL 16 |
|
||||
| Package/venv | uv |
|
||||
| Secrets | `.env` file |
|
||||
|
||||
## Project layout
|
||||
|
||||
```
|
||||
data_platform/ # Dagster Python package (assets, definitions)
|
||||
dbt/ # dbt project (models, seeds, tests)
|
||||
profiles.yml # reads credentials from env vars
|
||||
dagster_home/ # dagster.yaml + workspace.yaml
|
||||
Dockerfile # single image used by both dagster services
|
||||
docker-compose.yaml # postgres + dagster-webserver + dagster-daemon
|
||||
.env.example # copy to .env and fill in credentials
|
||||
pyproject.toml # uv-managed dependencies
|
||||
```
|
||||
|
||||
## Getting started
|
||||
|
||||
```bash
|
||||
# 1. Install uv (if not already)
|
||||
curl -Lsf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# 2. Clone and enter the project
|
||||
cd ~/git/data-platform
|
||||
|
||||
# 3. Create your credentials file
|
||||
cp .env.example .env
|
||||
# Edit .env with your passwords
|
||||
|
||||
# 4. Install dependencies into a local venv
|
||||
uv sync
|
||||
|
||||
# 5. Generate the dbt manifest (needed before first run)
|
||||
uv run dbt parse --profiles-dir dbt --project-dir dbt
|
||||
|
||||
# 6. Start all services
|
||||
docker compose up -d --build
|
||||
|
||||
# 7. Open the Dagster UI
|
||||
# http://localhost:3000
|
||||
```
|
||||
|
||||
## Local development (without Docker)
|
||||
|
||||
```bash
|
||||
uv sync
|
||||
source .venv/bin/activate
|
||||
|
||||
# Run the Dagster UI locally
|
||||
DAGSTER_HOME=$PWD/dagster_home dagster dev
|
||||
```
|
||||
6
dagster_home/dagster.yaml
Normal file
6
dagster_home/dagster.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# Dagster stores run history, event logs, and schedules in PostgreSQL.
|
||||
# Connection URL is read from the DAGSTER_POSTGRES_URL environment variable.
|
||||
storage:
|
||||
postgres:
|
||||
postgres_url:
|
||||
env: DAGSTER_POSTGRES_URL
|
||||
4
dagster_home/workspace.yaml
Normal file
4
dagster_home/workspace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
load_from:
|
||||
- python_package:
|
||||
package_name: data_platform
|
||||
attribute: defs
|
||||
3
data_platform/__init__.py
Normal file
3
data_platform/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from data_platform.definitions import defs
|
||||
|
||||
__all__ = ["defs"]
|
||||
36
data_platform/definitions.py
Normal file
36
data_platform/definitions.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from pathlib import Path
|
||||
|
||||
from dagster import Definitions
|
||||
from dagster_dbt import DbtCliResource, DbtProject, dbt_assets
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# dbt project
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DBT_PROJECT_DIR = Path(__file__).parent.parent / "dbt"
|
||||
|
||||
dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
|
||||
|
||||
# When running locally outside Docker, generate/refresh the manifest automatically.
|
||||
dbt_project.prepare_if_dev()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# dbt assets – every dbt model/test/snapshot becomes a Dagster asset
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dbt_assets(manifest=dbt_project.manifest_path)
|
||||
def dbt_project_assets(context, dbt: DbtCliResource):
|
||||
yield from dbt.cli(["build"], context=context).stream()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Definitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
defs = Definitions(
|
||||
assets=[dbt_project_assets],
|
||||
resources={
|
||||
"dbt": DbtCliResource(project_dir=str(DBT_PROJECT_DIR)),
|
||||
},
|
||||
)
|
||||
22
dbt/dbt_project.yml
Normal file
22
dbt/dbt_project.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
name: data_platform
|
||||
version: "1.0.0"
|
||||
profile: data_platform
|
||||
|
||||
model-paths: ["models"]
|
||||
seed-paths: ["seeds"]
|
||||
test-paths: ["tests"]
|
||||
analysis-paths: ["analyses"]
|
||||
macro-paths: ["macros"]
|
||||
snapshot-paths: ["snapshots"]
|
||||
|
||||
target-path: "target"
|
||||
clean-targets:
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
|
||||
models:
|
||||
data_platform:
|
||||
staging:
|
||||
+materialized: view
|
||||
marts:
|
||||
+materialized: table
|
||||
12
dbt/models/staging/schema.yml
Normal file
12
dbt/models/staging/schema.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: stg_example
|
||||
description: >
|
||||
A placeholder staging model. Replace with your actual source tables.
|
||||
columns:
|
||||
- name: id
|
||||
description: Primary key.
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
5
dbt/models/staging/stg_example.sql
Normal file
5
dbt/models/staging/stg_example.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
-- Placeholder staging model.
|
||||
-- Replace this with your actual source query, e.g.:
|
||||
-- select * from {{ source('my_source', 'my_table') }}
|
||||
|
||||
select 1 as id, 'example' as name
|
||||
12
dbt/profiles.yml
Normal file
12
dbt/profiles.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
data_platform:
|
||||
target: "{{ env_var('DBT_TARGET', 'dev') }}"
|
||||
outputs:
|
||||
dev:
|
||||
type: postgres
|
||||
host: "{{ env_var('POSTGRES_HOST', 'localhost') }}"
|
||||
port: "{{ env_var('POSTGRES_PORT', '5432') | int }}"
|
||||
user: "{{ env_var('POSTGRES_USER') }}"
|
||||
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||
dbname: "{{ env_var('POSTGRES_DB') }}"
|
||||
schema: staging
|
||||
threads: 4
|
||||
51
docker-compose.yaml
Normal file
51
docker-compose.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
version: "3.9"
|
||||
|
||||
# Shared config for all dagster services
|
||||
x-dagster: &dagster-common
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
env_file: .env
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
services:
|
||||
|
||||
# Metadata storage and dbt target
|
||||
postgres:
|
||||
image: postgres:16
|
||||
container_name: postgres
|
||||
restart: unless-stopped
|
||||
env_file: .env
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
ports:
|
||||
- "10.0.0.108:5432:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# Web UI
|
||||
dagster-webserver:
|
||||
<<: *dagster-common
|
||||
container_name: dagster-webserver
|
||||
command: ["dagster-webserver", "-h", "0.0.0.0", "-p", "3000"]
|
||||
ports:
|
||||
- "3000:3000"
|
||||
|
||||
# Schedules, sensors and run queuing
|
||||
dagster-daemon:
|
||||
<<: *dagster-common
|
||||
container_name: dagster-daemon
|
||||
command: ["dagster-daemon", "run"]
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
25
pyproject.toml
Normal file
25
pyproject.toml
Normal file
@@ -0,0 +1,25 @@
|
||||
[project]
|
||||
name = "data-platform"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"dagster",
|
||||
"dagster-webserver",
|
||||
"dagster-postgres",
|
||||
"dagster-dbt",
|
||||
"dbt-core",
|
||||
"dbt-postgres",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["data_platform"]
|
||||
|
||||
[tool.uv]
|
||||
dev-dependencies = [
|
||||
"pytest",
|
||||
"dagster-webserver",
|
||||
]
|
||||
Reference in New Issue
Block a user