inventory of borg backups
This commit is contained in:
11
apps/backup/Dockerfile
Normal file
11
apps/backup/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
FROM dagster-code-backup-base
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install --no-install-recommends --yes \
|
||||||
|
borgbackup openssh-client \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN mkdir -p /root/.ssh && chmod 0700 /root/.ssh/
|
||||||
|
COPY --chmod=0600 id_rsa /root/.ssh/
|
||||||
|
ADD --chmod=0600 ssh_config /root/.ssh/config
|
||||||
146
apps/backup/src/assets.py
Normal file
146
apps/backup/src/assets.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from datetime import datetime
|
||||||
|
from functools import partial
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
from config import APP, BORG_HOST, BORG_ROOT
|
||||||
|
from partitions import borg_repo_partitions_def, daily_partitions_def
|
||||||
|
from shared.utils import get_partition_keys
|
||||||
|
|
||||||
|
import dagster as dg
|
||||||
|
|
||||||
|
asset = partial(dg.asset, key_prefix=APP)
|
||||||
|
logger = structlog.get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
partitions_def=dg.MultiPartitionsDefinition(
|
||||||
|
{
|
||||||
|
"date": daily_partitions_def,
|
||||||
|
"repo": borg_repo_partitions_def,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
def borg_archive(context: dg.AssetExecutionContext) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
deps=[borg_archive],
|
||||||
|
partitions_def=dg.MultiPartitionsDefinition(
|
||||||
|
{
|
||||||
|
"date": daily_partitions_def,
|
||||||
|
"repo": borg_repo_partitions_def,
|
||||||
|
}
|
||||||
|
),
|
||||||
|
automation_condition=dg.AutomationCondition.eager(),
|
||||||
|
)
|
||||||
|
def borg_archive_info(context: dg.AssetExecutionContext) -> dg.Output[None]:
|
||||||
|
partition_keys = get_partition_keys(context)
|
||||||
|
ic(partition_keys)
|
||||||
|
|
||||||
|
location = f"ssh://{BORG_HOST}{BORG_ROOT}{partition_keys['repo']}::{partition_keys['date']}"
|
||||||
|
ic(location)
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["borg", "info", "--json", location],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
env={"BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK": "yes"},
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error("borg list failed", exc_info=e, code=e.returncode)
|
||||||
|
sys.stderr.write("borg list failed\n" + e.stderr)
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
ic(data)
|
||||||
|
tmp = data["archives"][0]
|
||||||
|
|
||||||
|
def parse_date(date_str, tz: str | None = None) -> dg.MetadataValue.timestamp:
|
||||||
|
return dg.MetadataValue.timestamp(
|
||||||
|
datetime.fromisoformat(date_str).replace(
|
||||||
|
tzinfo=ZoneInfo(tz or os.environ.get("TZ", "CET"))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return dg.Output(
|
||||||
|
None,
|
||||||
|
metadata={
|
||||||
|
"start": parse_date(tmp["start"]),
|
||||||
|
"end": parse_date(tmp["end"]),
|
||||||
|
"duration": dg.MetadataValue.float(tmp["duration"]),
|
||||||
|
"compressed_size": dg.MetadataValue.int(tmp["stats"]["compressed_size"]),
|
||||||
|
"deduplicated_size": dg.MetadataValue.int(
|
||||||
|
tmp["stats"]["deduplicated_size"]
|
||||||
|
),
|
||||||
|
"nfiles": dg.MetadataValue.int(tmp["stats"]["nfiles"]),
|
||||||
|
"original_size": dg.MetadataValue.int(tmp["stats"]["original_size"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# now run borg info ssh://shuttle/mnt/yotta/xenon/borg/opt/::2025-07-27 --json and register info
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
partitions_def=borg_repo_partitions_def,
|
||||||
|
)
|
||||||
|
def borg_repo(context: dg.AssetExecutionContext) -> None:
|
||||||
|
location = f"ssh://{BORG_HOST}{BORG_ROOT}{context.partition_key}"
|
||||||
|
ic(location)
|
||||||
|
repo = context.partition_key
|
||||||
|
|
||||||
|
# Get Borg backup list
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["borg", "list", "--json", location],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
env={"BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK": "yes"},
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error("borg list failed", exc_info=e, code=e.returncode)
|
||||||
|
sys.stderr.write("borg list failed\n" + e.stderr)
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
ic(data)
|
||||||
|
|
||||||
|
for entry in data.get("archives", []):
|
||||||
|
partition = f"{entry['archive']}|{repo}"
|
||||||
|
context.log_event(
|
||||||
|
dg.AssetMaterialization(
|
||||||
|
asset_key=borg_archive.key,
|
||||||
|
partition=partition,
|
||||||
|
metadata={
|
||||||
|
"id": dg.MetadataValue.text(entry["id"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# context.
|
||||||
|
|
||||||
|
# snapshots = data.get("archives", [])
|
||||||
|
#
|
||||||
|
# # Find latest backup for this day
|
||||||
|
# match = next(
|
||||||
|
# (s for s in reversed(snapshots)
|
||||||
|
# if datetime.fromisoformat(s["end"]).date() == expected_date),
|
||||||
|
# None
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# if match:
|
||||||
|
# context.log_event(
|
||||||
|
# dg.AssetMaterialization(
|
||||||
|
# asset_key=one.key, partition="2025-07-27"
|
||||||
|
# ) # this works!
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# return {
|
||||||
|
# "name": match["name"],
|
||||||
|
# "end": match["end"],
|
||||||
|
# "size": match.get("size", 0)
|
||||||
|
# }
|
||||||
|
# else:
|
||||||
|
# raise Exception(f"No backup found for {expected_date}")
|
||||||
7
apps/backup/src/config.py
Normal file
7
apps/backup/src/config.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
APP = os.environ.get("APP", Path(__file__).parent.parent.name)
|
||||||
|
|
||||||
|
BORG_HOST = "backup"
|
||||||
|
BORG_ROOT: str = "/mnt/yotta/xenon/borg/"
|
||||||
22
apps/backup/src/definitions.py
Normal file
22
apps/backup/src/definitions.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import assets
|
||||||
|
import sensors
|
||||||
|
from config import APP
|
||||||
|
from icecream import install
|
||||||
|
|
||||||
|
import dagster as dg
|
||||||
|
|
||||||
|
install()
|
||||||
|
|
||||||
|
definitions = dg.Definitions(
|
||||||
|
assets=[
|
||||||
|
asset.with_attributes(
|
||||||
|
group_names_by_key={asset.key: APP},
|
||||||
|
tags_by_key={asset.key: {"app": APP}},
|
||||||
|
)
|
||||||
|
for asset in dg.load_assets_from_modules([assets])
|
||||||
|
],
|
||||||
|
resources={},
|
||||||
|
jobs=[],
|
||||||
|
schedules=[],
|
||||||
|
sensors=[sensors.borg_repos],
|
||||||
|
)
|
||||||
8
apps/backup/src/partitions.py
Normal file
8
apps/backup/src/partitions.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import dagster as dg
|
||||||
|
|
||||||
|
borg_repo_partitions_def = dg.DynamicPartitionsDefinition(name="borg_repo")
|
||||||
|
daily_partitions_def = dg.DailyPartitionsDefinition(
|
||||||
|
start_date="2025-01-01", end_offset=1, timezone=os.environ.get("TZ", "UTC")
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user