feat: initial project setup
This commit is contained in:
12
.env.example
Normal file
12
.env.example
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# ── PostgreSQL ───────────────────────────────────────────────────────────────
|
||||||
|
POSTGRES_HOST=postgres
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_USER=dagster
|
||||||
|
POSTGRES_PASSWORD=changeme
|
||||||
|
POSTGRES_DB=dagster
|
||||||
|
|
||||||
|
# ── Dagster metadata storage (uses the same postgres instance) ───────────────
|
||||||
|
DAGSTER_POSTGRES_URL=postgresql://dagster:changeme@postgres:5432/dagster
|
||||||
|
|
||||||
|
# ── dbt profile target (overrides profiles.yml env_var defaults) ─────────────
|
||||||
|
DBT_TARGET=dev
|
||||||
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Environment / secrets
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
.venv/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
|
||||||
|
# uv
|
||||||
|
uv.lock
|
||||||
|
|
||||||
|
# dbt
|
||||||
|
dbt/target/
|
||||||
|
dbt/dbt_packages/
|
||||||
|
dbt/logs/
|
||||||
|
|
||||||
|
# Dagster
|
||||||
|
dagster_home/storage/
|
||||||
|
dagster_home/logs/
|
||||||
|
dagster_home/schedule_logs/
|
||||||
|
dagster_home/compute_logs/
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
*.log
|
||||||
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install uv
|
||||||
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||||
|
|
||||||
|
# Install dependencies before copying full source (layer caching)
|
||||||
|
COPY pyproject.toml uv.lock* ./
|
||||||
|
RUN uv sync --frozen --no-dev 2>/dev/null || uv sync --no-dev
|
||||||
|
|
||||||
|
# Copy application source
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Make the venv's binaries available on PATH
|
||||||
|
ENV PATH="/app/.venv/bin:$PATH"
|
||||||
|
ENV DAGSTER_HOME=/app/dagster_home
|
||||||
64
README.md
64
README.md
@@ -1 +1,63 @@
|
|||||||
# data-platform
|
# data-platform
|
||||||
|
|
||||||
|
A [Dagster](https://dagster.io/) + [dbt](https://www.getdbt.com/) data platform,
|
||||||
|
managed with [uv](https://github.com/astral-sh/uv) and deployed via Docker Compose.
|
||||||
|
|
||||||
|
## Stack
|
||||||
|
|
||||||
|
| Layer | Tool |
|
||||||
|
|---|---|
|
||||||
|
| Orchestration | Dagster (webserver + daemon) |
|
||||||
|
| Transformation | dbt-core + dbt-postgres |
|
||||||
|
| Storage | PostgreSQL 16 |
|
||||||
|
| Package/venv | uv |
|
||||||
|
| Secrets | `.env` file |
|
||||||
|
|
||||||
|
## Project layout
|
||||||
|
|
||||||
|
```
|
||||||
|
data_platform/ # Dagster Python package (assets, definitions)
|
||||||
|
dbt/ # dbt project (models, seeds, tests)
|
||||||
|
profiles.yml # reads credentials from env vars
|
||||||
|
dagster_home/ # dagster.yaml + workspace.yaml
|
||||||
|
Dockerfile # single image used by both dagster services
|
||||||
|
docker-compose.yaml # postgres + dagster-webserver + dagster-daemon
|
||||||
|
.env.example # copy to .env and fill in credentials
|
||||||
|
pyproject.toml # uv-managed dependencies
|
||||||
|
```
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Install uv (if not already)
|
||||||
|
curl -Lsf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
|
# 2. Clone and enter the project
|
||||||
|
cd ~/git/data-platform
|
||||||
|
|
||||||
|
# 3. Create your credentials file
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your passwords
|
||||||
|
|
||||||
|
# 4. Install dependencies into a local venv
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
# 5. Generate the dbt manifest (needed before first run)
|
||||||
|
uv run dbt parse --profiles-dir dbt --project-dir dbt
|
||||||
|
|
||||||
|
# 6. Start all services
|
||||||
|
docker compose up -d --build
|
||||||
|
|
||||||
|
# 7. Open the Dagster UI
|
||||||
|
# http://localhost:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
## Local development (without Docker)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv sync
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# Run the Dagster UI locally
|
||||||
|
DAGSTER_HOME=$PWD/dagster_home dagster dev
|
||||||
|
```
|
||||||
6
dagster_home/dagster.yaml
Normal file
6
dagster_home/dagster.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# Dagster stores run history, event logs, and schedules in PostgreSQL.
|
||||||
|
# Connection URL is read from the DAGSTER_POSTGRES_URL environment variable.
|
||||||
|
storage:
|
||||||
|
postgres:
|
||||||
|
postgres_url:
|
||||||
|
env: DAGSTER_POSTGRES_URL
|
||||||
4
dagster_home/workspace.yaml
Normal file
4
dagster_home/workspace.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
load_from:
|
||||||
|
- python_package:
|
||||||
|
package_name: data_platform
|
||||||
|
attribute: defs
|
||||||
3
data_platform/__init__.py
Normal file
3
data_platform/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from data_platform.definitions import defs
|
||||||
|
|
||||||
|
__all__ = ["defs"]
|
||||||
36
data_platform/definitions.py
Normal file
36
data_platform/definitions.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from dagster import Definitions
|
||||||
|
from dagster_dbt import DbtCliResource, DbtProject, dbt_assets
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# dbt project
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DBT_PROJECT_DIR = Path(__file__).parent.parent / "dbt"
|
||||||
|
|
||||||
|
dbt_project = DbtProject(project_dir=str(DBT_PROJECT_DIR))
|
||||||
|
|
||||||
|
# When running locally outside Docker, generate/refresh the manifest automatically.
|
||||||
|
dbt_project.prepare_if_dev()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# dbt assets – every dbt model/test/snapshot becomes a Dagster asset
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dbt_assets(manifest=dbt_project.manifest_path)
|
||||||
|
def dbt_project_assets(context, dbt: DbtCliResource):
|
||||||
|
yield from dbt.cli(["build"], context=context).stream()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Definitions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
defs = Definitions(
|
||||||
|
assets=[dbt_project_assets],
|
||||||
|
resources={
|
||||||
|
"dbt": DbtCliResource(project_dir=str(DBT_PROJECT_DIR)),
|
||||||
|
},
|
||||||
|
)
|
||||||
22
dbt/dbt_project.yml
Normal file
22
dbt/dbt_project.yml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
name: data_platform
|
||||||
|
version: "1.0.0"
|
||||||
|
profile: data_platform
|
||||||
|
|
||||||
|
model-paths: ["models"]
|
||||||
|
seed-paths: ["seeds"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
analysis-paths: ["analyses"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
snapshot-paths: ["snapshots"]
|
||||||
|
|
||||||
|
target-path: "target"
|
||||||
|
clean-targets:
|
||||||
|
- "target"
|
||||||
|
- "dbt_packages"
|
||||||
|
|
||||||
|
models:
|
||||||
|
data_platform:
|
||||||
|
staging:
|
||||||
|
+materialized: view
|
||||||
|
marts:
|
||||||
|
+materialized: table
|
||||||
12
dbt/models/staging/schema.yml
Normal file
12
dbt/models/staging/schema.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: stg_example
|
||||||
|
description: >
|
||||||
|
A placeholder staging model. Replace with your actual source tables.
|
||||||
|
columns:
|
||||||
|
- name: id
|
||||||
|
description: Primary key.
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
5
dbt/models/staging/stg_example.sql
Normal file
5
dbt/models/staging/stg_example.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
-- Placeholder staging model.
|
||||||
|
-- Replace this with your actual source query, e.g.:
|
||||||
|
-- select * from {{ source('my_source', 'my_table') }}
|
||||||
|
|
||||||
|
select 1 as id, 'example' as name
|
||||||
12
dbt/profiles.yml
Normal file
12
dbt/profiles.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
data_platform:
|
||||||
|
target: "{{ env_var('DBT_TARGET', 'dev') }}"
|
||||||
|
outputs:
|
||||||
|
dev:
|
||||||
|
type: postgres
|
||||||
|
host: "{{ env_var('POSTGRES_HOST', 'localhost') }}"
|
||||||
|
port: "{{ env_var('POSTGRES_PORT', '5432') | int }}"
|
||||||
|
user: "{{ env_var('POSTGRES_USER') }}"
|
||||||
|
password: "{{ env_var('POSTGRES_PASSWORD') }}"
|
||||||
|
dbname: "{{ env_var('POSTGRES_DB') }}"
|
||||||
|
schema: staging
|
||||||
|
threads: 4
|
||||||
51
docker-compose.yaml
Normal file
51
docker-compose.yaml
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
# Shared config for all dagster services
|
||||||
|
x-dagster: &dagster-common
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
env_file: .env
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
# Metadata storage and dbt target
|
||||||
|
postgres:
|
||||||
|
image: postgres:16
|
||||||
|
container_name: postgres
|
||||||
|
restart: unless-stopped
|
||||||
|
env_file: .env
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
volumes:
|
||||||
|
- postgres-data:/var/lib/postgresql/data
|
||||||
|
ports:
|
||||||
|
- "10.0.0.108:5432:5432"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# Web UI
|
||||||
|
dagster-webserver:
|
||||||
|
<<: *dagster-common
|
||||||
|
container_name: dagster-webserver
|
||||||
|
command: ["dagster-webserver", "-h", "0.0.0.0", "-p", "3000"]
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
|
||||||
|
# Schedules, sensors and run queuing
|
||||||
|
dagster-daemon:
|
||||||
|
<<: *dagster-common
|
||||||
|
container_name: dagster-daemon
|
||||||
|
command: ["dagster-daemon", "run"]
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres-data:
|
||||||
25
pyproject.toml
Normal file
25
pyproject.toml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
[project]
|
||||||
|
name = "data-platform"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"dagster",
|
||||||
|
"dagster-webserver",
|
||||||
|
"dagster-postgres",
|
||||||
|
"dagster-dbt",
|
||||||
|
"dbt-core",
|
||||||
|
"dbt-postgres",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["data_platform"]
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
dev-dependencies = [
|
||||||
|
"pytest",
|
||||||
|
"dagster-webserver",
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user