3e9c12eb75
Make the off-site snapshot protection a repeatable, monitored proof (DECISIONS 2026-06-11/-23): a read-only restore-test that lists .zfs/snapshot on the Storage Box, checks retention and newest-snapshot age, and SFTP-fetches one small file from the newest snapshot (size + SHA256). Connection is derived from the borg-ui repo URL and runs via docker exec borg-ui; no secret in the script, no write access. Wired into the run-restore-checks.sh dispatcher; runbook documents the pending one-time live validation. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
188 lines
7.4 KiB
Bash
Executable File
188 lines
7.4 KiB
Bash
Executable File
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Hetzner Storage Box Snapshot Restore Test
|
|
#
|
|
# Belegt, dass der Off-site-Schutz wirklich greift. Append-only ist bewusst NICHT
|
|
# umgesetzt (DECISIONS 2026-06-01); der Schutz ist snapshot-basiert (DECISIONS
|
|
# 2026-06-11): operative Borg-Creds koennen weiter prune/compact, die ZFS-
|
|
# Snapshots der Storage Box sind hostseitig aber nicht loeschbar. Dieser Test
|
|
# macht den am 2026-06-23 manuell gefuehrten Beweis wiederholbar und ueberwachbar.
|
|
#
|
|
# Scope (READ-ONLY gegen die Storage Box, ueber den borg-ui-Container):
|
|
# 1. .zfs/snapshot/ listen -> Anzahl + neuesten Snapshot bestimmen (Retention)
|
|
# 2. Alter des neuesten Snapshots aus dem Namen pruefen (Automatic-<ISO>)
|
|
# 3. eine kleine Datei (Borg-Repo `README`) aus dem neuesten Snapshot per SFTP
|
|
# in den Container nach /tmp holen, Groesse + SHA256 pruefen, danach loeschen
|
|
# 4. Report nach /mnt/user/backups/restore-reports/
|
|
#
|
|
# KEIN Schreibzugriff auf die Box, kein borg prune/compact, keine produktiven Pfade.
|
|
#
|
|
# Verbindung wird aus der in borg-ui konfigurierten Borg-Repo-URL abgeleitet
|
|
# (kein Secret im Skript). SSH-Key + known_hosts liegen bereits im borg-ui-
|
|
# Container und werden via BORG_RSH-Konvention genutzt.
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
. "$SCRIPT_DIR/common.sh"
|
|
|
|
WHATIF=0
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--what-if) WHATIF=1 ;;
|
|
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
REPORT_ROOT="/mnt/user/backups/restore-reports"
|
|
REPORT_FILE="$REPORT_ROOT/hetzner-snapshot-$(date +%F).md"
|
|
|
|
# Erwartungswerte = Zielbild lt. DECISIONS 2026-06-11 (taeglich, Retention 7 Tage).
|
|
MIN_SNAPSHOTS="${MIN_SNAPSHOTS:-5}"
|
|
MAX_SNAPSHOT_AGE_HOURS="${MAX_SNAPSHOT_AGE_HOURS:-48}"
|
|
SNAPSHOT_DIR="${SNAPSHOT_DIR:-.zfs/snapshot}"
|
|
PROBE_FILE="${PROBE_FILE:-README}" # jede Borg-Repo-Wurzel hat README + config
|
|
SNAPSHOT_NAME_GLOB="${SNAPSHOT_NAME_GLOB:-Automatic-}"
|
|
# Zusaetzliche ssh/sftp-Optionen; Default nutzt borg-uis known_hosts wie BORG_RSH.
|
|
SNAPSHOT_SSH_OPTS="${SNAPSHOT_SSH_OPTS:--o UserKnownHostsFile=/data/known_hosts -o StrictHostKeyChecking=yes -o BatchMode=yes}"
|
|
PROBE_TMP_DIR="${PROBE_TMP_DIR:-/tmp/hetzner-snapshot-probe}"
|
|
|
|
if [ "$WHATIF" -eq 1 ]; then
|
|
cat <<EOF
|
|
Hetzner Storage Box snapshot restore test
|
|
Mode: WhatIf
|
|
Container: $BORG_CONTAINER
|
|
Snapshot dir (rel. login home): $SNAPSHOT_DIR
|
|
Probe file: <repo>/$PROBE_FILE
|
|
Min snapshots: $MIN_SNAPSHOTS
|
|
Max age (h): $MAX_SNAPSHOT_AGE_HOURS
|
|
Scope: list snapshots + SFTP get one small file from newest snapshot + sha256
|
|
Note: connection derived from borg-ui repo URL; no productive write.
|
|
EOF
|
|
exit 0
|
|
fi
|
|
|
|
require_cmd docker
|
|
require_cmd date
|
|
require_borg_container
|
|
|
|
# --- Borg-Repo-URL aus borg-ui-DB; daraus user/host/port + Repo-Verzeichnis ----
|
|
repo="$(borg_repo_url)"
|
|
if [ -z "$repo" ]; then
|
|
echo "Could not resolve Borg repo URL from borg-ui database" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Unterstuetzte Formen: ssh://user@host:port/./reldir | user@host:reldir
|
|
proto_stripped="${repo#ssh://}"
|
|
if [ "$proto_stripped" != "$repo" ]; then
|
|
userhostport="${proto_stripped%%/*}"
|
|
repo_path="/${proto_stripped#*/}"
|
|
else
|
|
userhostport="${proto_stripped%%:*}"
|
|
repo_path="${proto_stripped#*:}"
|
|
fi
|
|
ssh_user="${userhostport%%@*}"
|
|
hostport="${userhostport#*@}"
|
|
ssh_host="${hostport%%:*}"
|
|
if [ "$hostport" = "$ssh_host" ]; then ssh_port=22; else ssh_port="${hostport##*:}"; fi
|
|
# Repo-Verzeichnis relativ zum Login-Home: fuehrende /, ./ und /./ entfernen
|
|
repo_dir="$repo_path"
|
|
repo_dir="${repo_dir#/}"; repo_dir="${repo_dir#./}"; repo_dir="${repo_dir#/}"
|
|
|
|
if [ -z "$ssh_user" ] || [ -z "$ssh_host" ] || [ -z "$repo_dir" ]; then
|
|
echo "Could not parse user/host/repo-dir from repo URL: $repo" >&2
|
|
exit 1
|
|
fi
|
|
|
|
run_sftp() { # liest Batch-Kommandos von stdin
|
|
# shellcheck disable=SC2086
|
|
docker exec -i "$BORG_CONTAINER" sftp -q -P "$ssh_port" $SNAPSHOT_SSH_OPTS -b - "$ssh_user@$ssh_host"
|
|
}
|
|
|
|
# --- 1) Snapshots listen --------------------------------------------------------
|
|
snap_list="$(printf 'ls -1 %s\nbye\n' "$SNAPSHOT_DIR" | run_sftp 2>/dev/null \
|
|
| tr -d '\r' | grep -F "$SNAPSHOT_NAME_GLOB" | sed 's#.*/##' | sort -u || true)"
|
|
|
|
if [ -z "$snap_list" ]; then
|
|
echo "No snapshots found in $SNAPSHOT_DIR on $ssh_host (glob: $SNAPSHOT_NAME_GLOB)" >&2
|
|
exit 1
|
|
fi
|
|
|
|
snap_count="$(printf '%s\n' "$snap_list" | grep -c . || true)"
|
|
newest="$(printf '%s\n' "$snap_list" | sort | tail -n 1)"
|
|
|
|
# --- 2) Alter des neuesten Snapshots aus dem Namen ableiten ---------------------
|
|
# Format: Automatic-YYYY-MM-DDTHH-MM-SS
|
|
age_hours="unknown"
|
|
ts="${newest#${SNAPSHOT_NAME_GLOB}}"
|
|
date_part="${ts%%T*}"
|
|
time_part="${ts#*T}"
|
|
time_colons="$(printf '%s' "$time_part" | tr '-' ':')"
|
|
if snap_epoch="$(date -d "$date_part $time_colons" +%s 2>/dev/null)"; then
|
|
now_epoch="$(date +%s)"
|
|
age_hours="$(( (now_epoch - snap_epoch) / 3600 ))"
|
|
fi
|
|
|
|
# --- 3) Einzeldatei aus dem neuesten Snapshot holen + pruefen -------------------
|
|
remote_probe="$SNAPSHOT_DIR/$newest/$repo_dir/$PROBE_FILE"
|
|
docker exec -i "$BORG_CONTAINER" sh -c "rm -rf '$PROBE_TMP_DIR' && mkdir -p '$PROBE_TMP_DIR'"
|
|
|
|
probe_ok="no"
|
|
probe_size=0
|
|
probe_sha256="n/a"
|
|
if printf 'get %s %s/%s\nbye\n' "$remote_probe" "$PROBE_TMP_DIR" "$PROBE_FILE" | run_sftp 2>/dev/null; then
|
|
if docker exec -i "$BORG_CONTAINER" test -s "$PROBE_TMP_DIR/$PROBE_FILE"; then
|
|
probe_ok="yes"
|
|
probe_size="$(docker exec -i "$BORG_CONTAINER" stat -c '%s' "$PROBE_TMP_DIR/$PROBE_FILE" 2>/dev/null || echo 0)"
|
|
probe_sha256="$(docker exec -i "$BORG_CONTAINER" sha256sum "$PROBE_TMP_DIR/$PROBE_FILE" 2>/dev/null | awk '{print $1}' || echo n/a)"
|
|
fi
|
|
fi
|
|
# Temp im Container wieder loeschen (kein Datenrest)
|
|
docker exec -i "$BORG_CONTAINER" rm -rf "$PROBE_TMP_DIR" >/dev/null 2>&1 || true
|
|
|
|
# --- Bewertung ------------------------------------------------------------------
|
|
result="SUCCESS"
|
|
fail_reason=""
|
|
if [ "$probe_ok" != "yes" ]; then
|
|
result="FAILED"; fail_reason="Einzeldatei-Restore aus Snapshot fehlgeschlagen ($remote_probe)"
|
|
elif [ "$snap_count" -lt "$MIN_SNAPSHOTS" ]; then
|
|
result="FAILED"; fail_reason="Zu wenige Snapshots: $snap_count < $MIN_SNAPSHOTS"
|
|
elif [ "$age_hours" != "unknown" ] && [ "$age_hours" -gt "$MAX_SNAPSHOT_AGE_HOURS" ]; then
|
|
result="FAILED"; fail_reason="Neuester Snapshot zu alt: ${age_hours}h > ${MAX_SNAPSHOT_AGE_HOURS}h"
|
|
fi
|
|
|
|
write_report "$REPORT_FILE" <<EOF
|
|
# Hetzner Storage Box Snapshot Restore Test - $(date +%F)
|
|
|
|
- Scope: \`Off-site Snapshot-Schutz (nicht append-only)\`
|
|
- Storage Box host: \`$ssh_host\`
|
|
- Borg repo dir: \`$repo_dir\`
|
|
- Snapshot dir: \`$SNAPSHOT_DIR\`
|
|
- Result: \`$result\`
|
|
|
|
## Checks
|
|
|
|
- Snapshots gefunden: \`$snap_count\` (min \`$MIN_SNAPSHOTS\`)
|
|
- Neuester Snapshot: \`$newest\`
|
|
- Alter neuester Snapshot: \`${age_hours}h\` (max \`${MAX_SNAPSHOT_AGE_HOURS}h\`)
|
|
- Probe-Datei: \`$repo_dir/$PROBE_FILE\`
|
|
- Einzeldatei-Restore aus Snapshot: \`$probe_ok\`
|
|
- Probe-Groesse: \`${probe_size} B\`
|
|
- Probe-SHA256: \`$probe_sha256\`
|
|
$( [ -n "$fail_reason" ] && echo "- Fehlergrund: \`$fail_reason\`" )
|
|
|
|
## Notes
|
|
|
|
- READ-ONLY: nur \`ls\` + \`get\` einer kleinen Datei via SFTP; kein Schreibzugriff,
|
|
kein borg prune/compact, keine produktiven Pfade.
|
|
- Verbindung aus der borg-ui-Repo-URL abgeleitet; Secrets/known_hosts bleiben im Container.
|
|
- Schutzmodell ist snapshot-basiert, append-only bewusst nicht (DECISIONS 2026-06-01/-11).
|
|
EOF
|
|
|
|
if [ "$result" != "SUCCESS" ]; then
|
|
echo "Hetzner snapshot restore test FAILED: $fail_reason -> $REPORT_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "Hetzner snapshot restore test ok ($snap_count snapshots, newest $newest, probe ${probe_size}B) -> $REPORT_FILE"
|