Files
homelab-infra/ops/restore-tests/hetzner-snapshot-restore-test.sh
T
Micha 3e9c12eb75 Add Hetzner Storage Box snapshot restore test
Make the off-site snapshot protection a repeatable, monitored proof (DECISIONS 2026-06-11/-23): a read-only restore-test that lists .zfs/snapshot on the Storage Box, checks retention and newest-snapshot age, and SFTP-fetches one small file from the newest snapshot (size + SHA256). Connection is derived from the borg-ui repo URL and runs via docker exec borg-ui; no secret in the script, no write access. Wired into the run-restore-checks.sh dispatcher; runbook documents the pending one-time live validation.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 12:18:44 +02:00

188 lines
7.4 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
# Hetzner Storage Box Snapshot Restore Test
#
# Belegt, dass der Off-site-Schutz wirklich greift. Append-only ist bewusst NICHT
# umgesetzt (DECISIONS 2026-06-01); der Schutz ist snapshot-basiert (DECISIONS
# 2026-06-11): operative Borg-Creds koennen weiter prune/compact, die ZFS-
# Snapshots der Storage Box sind hostseitig aber nicht loeschbar. Dieser Test
# macht den am 2026-06-23 manuell gefuehrten Beweis wiederholbar und ueberwachbar.
#
# Scope (READ-ONLY gegen die Storage Box, ueber den borg-ui-Container):
# 1. .zfs/snapshot/ listen -> Anzahl + neuesten Snapshot bestimmen (Retention)
# 2. Alter des neuesten Snapshots aus dem Namen pruefen (Automatic-<ISO>)
# 3. eine kleine Datei (Borg-Repo `README`) aus dem neuesten Snapshot per SFTP
# in den Container nach /tmp holen, Groesse + SHA256 pruefen, danach loeschen
# 4. Report nach /mnt/user/backups/restore-reports/
#
# KEIN Schreibzugriff auf die Box, kein borg prune/compact, keine produktiven Pfade.
#
# Verbindung wird aus der in borg-ui konfigurierten Borg-Repo-URL abgeleitet
# (kein Secret im Skript). SSH-Key + known_hosts liegen bereits im borg-ui-
# Container und werden via BORG_RSH-Konvention genutzt.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/common.sh"
WHATIF=0
for arg in "$@"; do
case "$arg" in
--what-if) WHATIF=1 ;;
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
esac
done
REPORT_ROOT="/mnt/user/backups/restore-reports"
REPORT_FILE="$REPORT_ROOT/hetzner-snapshot-$(date +%F).md"
# Erwartungswerte = Zielbild lt. DECISIONS 2026-06-11 (taeglich, Retention 7 Tage).
MIN_SNAPSHOTS="${MIN_SNAPSHOTS:-5}"
MAX_SNAPSHOT_AGE_HOURS="${MAX_SNAPSHOT_AGE_HOURS:-48}"
SNAPSHOT_DIR="${SNAPSHOT_DIR:-.zfs/snapshot}"
PROBE_FILE="${PROBE_FILE:-README}" # jede Borg-Repo-Wurzel hat README + config
SNAPSHOT_NAME_GLOB="${SNAPSHOT_NAME_GLOB:-Automatic-}"
# Zusaetzliche ssh/sftp-Optionen; Default nutzt borg-uis known_hosts wie BORG_RSH.
SNAPSHOT_SSH_OPTS="${SNAPSHOT_SSH_OPTS:--o UserKnownHostsFile=/data/known_hosts -o StrictHostKeyChecking=yes -o BatchMode=yes}"
PROBE_TMP_DIR="${PROBE_TMP_DIR:-/tmp/hetzner-snapshot-probe}"
if [ "$WHATIF" -eq 1 ]; then
cat <<EOF
Hetzner Storage Box snapshot restore test
Mode: WhatIf
Container: $BORG_CONTAINER
Snapshot dir (rel. login home): $SNAPSHOT_DIR
Probe file: <repo>/$PROBE_FILE
Min snapshots: $MIN_SNAPSHOTS
Max age (h): $MAX_SNAPSHOT_AGE_HOURS
Scope: list snapshots + SFTP get one small file from newest snapshot + sha256
Note: connection derived from borg-ui repo URL; no productive write.
EOF
exit 0
fi
require_cmd docker
require_cmd date
require_borg_container
# --- Borg-Repo-URL aus borg-ui-DB; daraus user/host/port + Repo-Verzeichnis ----
repo="$(borg_repo_url)"
if [ -z "$repo" ]; then
echo "Could not resolve Borg repo URL from borg-ui database" >&2
exit 1
fi
# Unterstuetzte Formen: ssh://user@host:port/./reldir | user@host:reldir
proto_stripped="${repo#ssh://}"
if [ "$proto_stripped" != "$repo" ]; then
userhostport="${proto_stripped%%/*}"
repo_path="/${proto_stripped#*/}"
else
userhostport="${proto_stripped%%:*}"
repo_path="${proto_stripped#*:}"
fi
ssh_user="${userhostport%%@*}"
hostport="${userhostport#*@}"
ssh_host="${hostport%%:*}"
if [ "$hostport" = "$ssh_host" ]; then ssh_port=22; else ssh_port="${hostport##*:}"; fi
# Repo-Verzeichnis relativ zum Login-Home: fuehrende /, ./ und /./ entfernen
repo_dir="$repo_path"
repo_dir="${repo_dir#/}"; repo_dir="${repo_dir#./}"; repo_dir="${repo_dir#/}"
if [ -z "$ssh_user" ] || [ -z "$ssh_host" ] || [ -z "$repo_dir" ]; then
echo "Could not parse user/host/repo-dir from repo URL: $repo" >&2
exit 1
fi
run_sftp() { # liest Batch-Kommandos von stdin
# shellcheck disable=SC2086
docker exec -i "$BORG_CONTAINER" sftp -q -P "$ssh_port" $SNAPSHOT_SSH_OPTS -b - "$ssh_user@$ssh_host"
}
# --- 1) Snapshots listen --------------------------------------------------------
snap_list="$(printf 'ls -1 %s\nbye\n' "$SNAPSHOT_DIR" | run_sftp 2>/dev/null \
| tr -d '\r' | grep -F "$SNAPSHOT_NAME_GLOB" | sed 's#.*/##' | sort -u || true)"
if [ -z "$snap_list" ]; then
echo "No snapshots found in $SNAPSHOT_DIR on $ssh_host (glob: $SNAPSHOT_NAME_GLOB)" >&2
exit 1
fi
snap_count="$(printf '%s\n' "$snap_list" | grep -c . || true)"
newest="$(printf '%s\n' "$snap_list" | sort | tail -n 1)"
# --- 2) Alter des neuesten Snapshots aus dem Namen ableiten ---------------------
# Format: Automatic-YYYY-MM-DDTHH-MM-SS
age_hours="unknown"
ts="${newest#${SNAPSHOT_NAME_GLOB}}"
date_part="${ts%%T*}"
time_part="${ts#*T}"
time_colons="$(printf '%s' "$time_part" | tr '-' ':')"
if snap_epoch="$(date -d "$date_part $time_colons" +%s 2>/dev/null)"; then
now_epoch="$(date +%s)"
age_hours="$(( (now_epoch - snap_epoch) / 3600 ))"
fi
# --- 3) Einzeldatei aus dem neuesten Snapshot holen + pruefen -------------------
remote_probe="$SNAPSHOT_DIR/$newest/$repo_dir/$PROBE_FILE"
docker exec -i "$BORG_CONTAINER" sh -c "rm -rf '$PROBE_TMP_DIR' && mkdir -p '$PROBE_TMP_DIR'"
probe_ok="no"
probe_size=0
probe_sha256="n/a"
if printf 'get %s %s/%s\nbye\n' "$remote_probe" "$PROBE_TMP_DIR" "$PROBE_FILE" | run_sftp 2>/dev/null; then
if docker exec -i "$BORG_CONTAINER" test -s "$PROBE_TMP_DIR/$PROBE_FILE"; then
probe_ok="yes"
probe_size="$(docker exec -i "$BORG_CONTAINER" stat -c '%s' "$PROBE_TMP_DIR/$PROBE_FILE" 2>/dev/null || echo 0)"
probe_sha256="$(docker exec -i "$BORG_CONTAINER" sha256sum "$PROBE_TMP_DIR/$PROBE_FILE" 2>/dev/null | awk '{print $1}' || echo n/a)"
fi
fi
# Temp im Container wieder loeschen (kein Datenrest)
docker exec -i "$BORG_CONTAINER" rm -rf "$PROBE_TMP_DIR" >/dev/null 2>&1 || true
# --- Bewertung ------------------------------------------------------------------
result="SUCCESS"
fail_reason=""
if [ "$probe_ok" != "yes" ]; then
result="FAILED"; fail_reason="Einzeldatei-Restore aus Snapshot fehlgeschlagen ($remote_probe)"
elif [ "$snap_count" -lt "$MIN_SNAPSHOTS" ]; then
result="FAILED"; fail_reason="Zu wenige Snapshots: $snap_count < $MIN_SNAPSHOTS"
elif [ "$age_hours" != "unknown" ] && [ "$age_hours" -gt "$MAX_SNAPSHOT_AGE_HOURS" ]; then
result="FAILED"; fail_reason="Neuester Snapshot zu alt: ${age_hours}h > ${MAX_SNAPSHOT_AGE_HOURS}h"
fi
write_report "$REPORT_FILE" <<EOF
# Hetzner Storage Box Snapshot Restore Test - $(date +%F)
- Scope: \`Off-site Snapshot-Schutz (nicht append-only)\`
- Storage Box host: \`$ssh_host\`
- Borg repo dir: \`$repo_dir\`
- Snapshot dir: \`$SNAPSHOT_DIR\`
- Result: \`$result\`
## Checks
- Snapshots gefunden: \`$snap_count\` (min \`$MIN_SNAPSHOTS\`)
- Neuester Snapshot: \`$newest\`
- Alter neuester Snapshot: \`${age_hours}h\` (max \`${MAX_SNAPSHOT_AGE_HOURS}h\`)
- Probe-Datei: \`$repo_dir/$PROBE_FILE\`
- Einzeldatei-Restore aus Snapshot: \`$probe_ok\`
- Probe-Groesse: \`${probe_size} B\`
- Probe-SHA256: \`$probe_sha256\`
$( [ -n "$fail_reason" ] && echo "- Fehlergrund: \`$fail_reason\`" )
## Notes
- READ-ONLY: nur \`ls\` + \`get\` einer kleinen Datei via SFTP; kein Schreibzugriff,
kein borg prune/compact, keine produktiven Pfade.
- Verbindung aus der borg-ui-Repo-URL abgeleitet; Secrets/known_hosts bleiben im Container.
- Schutzmodell ist snapshot-basiert, append-only bewusst nicht (DECISIONS 2026-06-01/-11).
EOF
if [ "$result" != "SUCCESS" ]; then
echo "Hetzner snapshot restore test FAILED: $fail_reason -> $REPORT_FILE" >&2
exit 1
fi
echo "Hetzner snapshot restore test ok ($snap_count snapshots, newest $newest, probe ${probe_size}B) -> $REPORT_FILE"