Files
homelab-infra/ops/restore-tests/check-restore-freshness.sh
T
Micha 5e52316fab fix(restore): freshness check pg_header_ok returns 1 on corrupt dump
Negativ-Test 2026-06-03: korrupter mealie.dump wurde nicht erkannt,
weil der Docker-Fallback-Pfad nach gescheitertem pg_restore --list
zu return 2 (unchecked) durchfiel statt return 1 (invalid).

Fix: explizites if/else statt &&-Kette, damit fehlgeschlagene
Header-Validierung return 1 liefert und als DUMP_HEADER_INVALID
in den Critical-Zaehler geht.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 13:47:08 +02:00

177 lines
4.5 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
DUMP_ROOT="${DUMP_ROOT:-/mnt/user/backups/borg/dumps/latest}"
REPORT_ROOT="${REPORT_ROOT:-/mnt/user/backups/restore-reports}"
MAX_DUMP_AGE_HOURS="${MAX_DUMP_AGE_HOURS:-26}"
MAX_REPORT_AGE_DAYS="${MAX_REPORT_AGE_DAYS:-45}"
now_epoch="$(date +%s)"
critical=()
warnings=()
info=()
check_file_age_hours() {
local path="$1"
local mtime
mtime="$(stat -c %Y "$path")"
echo $(( (now_epoch - mtime) / 3600 ))
}
check_file_age_days() {
local path="$1"
local mtime
mtime="$(stat -c %Y "$path")"
echo $(( (now_epoch - mtime) / 86400 ))
}
# pg_restore --list als billiger Header-Check fuer Custom-Format-Dumps;
# erkennt Korruption, die mit reinem "exists+nonempty" durchrutscht. Wir
# brauchen kein laufendes Postgres; der Check liest nur die Toc-Section.
PG_DUMPS="postgresql17-paperless.dump postgresql17-mailarchiver.dump postgresql17-authelia.dump mealie.dump immich.dump nextcloud.dump"
is_pg_custom_dump() {
case " $PG_DUMPS " in *" $1 "*) return 0;; *) return 1;; esac
}
pg_header_ok() {
local path="$1"
if ! command -v pg_restore >/dev/null 2>&1; then
# ohne Host-pg_restore: in laufendem Postgres-Container probieren
if command -v docker >/dev/null 2>&1 && docker inspect postgresql17 >/dev/null 2>&1; then
if docker exec -i postgresql17 pg_restore --list < "$path" >/dev/null 2>&1; then
return 0 # Header valide
else
return 1 # Header korrupt
fi
fi
return 2 # nicht pruefbar (kein pg_restore, kein Container)
fi
pg_restore --list "$path" >/dev/null 2>&1
}
check_pg_header() {
local dump="$1"
local path="$2"
local age="$3"
local missing_mode="${4:-critical}"
if [ ! -f "$path" ]; then
if [ "$missing_mode" = "optional" ]; then
info+=("DUMP_OPTIONAL_MISSING $dump")
else
critical+=("DUMP_MISSING $dump")
fi
return
fi
if [ ! -s "$path" ]; then
critical+=("DUMP_EMPTY $dump")
return
fi
if [ "$age" -gt "$MAX_DUMP_AGE_HOURS" ]; then
if [ "$missing_mode" = "optional" ]; then
warnings+=("DUMP_OPTIONAL_STALE $dump age=${age}h")
else
critical+=("DUMP_STALE $dump age=${age}h")
fi
return
fi
if pg_header_ok "$path"; then
rc=0
else
rc=$?
fi
case "$rc" in
0) info+=("DUMP_OK $dump age=${age}h header=ok") ;;
1) critical+=("DUMP_HEADER_INVALID $dump (pg_restore --list failed)") ;;
2) info+=("DUMP_OK $dump age=${age}h header=unchecked") ;;
esac
}
for dump in \
postgresql17-paperless.dump \
postgresql17-mailarchiver.dump \
mealie.dump \
immich.dump \
nextcloud.dump \
gitea.sqlite.dump \
vaultwarden.sqlite.dump \
speedtest-tracker.sqlite.dump \
filebrowser.bolt.dump \
unraid-flash-config.tar.gz; do
path="$DUMP_ROOT/$dump"
if [ ! -f "$path" ]; then
critical+=("DUMP_MISSING $dump")
continue
fi
if [ ! -s "$path" ]; then
critical+=("DUMP_EMPTY $dump")
continue
fi
age="$(check_file_age_hours "$path")"
if [ "$age" -gt "$MAX_DUMP_AGE_HOURS" ]; then
critical+=("DUMP_STALE $dump age=${age}h")
continue
fi
if is_pg_custom_dump "$dump"; then
check_pg_header "$dump" "$path" "$age"
else
info+=("DUMP_OK $dump age=${age}h")
fi
done
optional_dump="postgresql17-authelia.dump"
optional_path="$DUMP_ROOT/$optional_dump"
optional_age=0
if [ -f "$optional_path" ]; then
optional_age="$(check_file_age_hours "$optional_path")"
fi
check_pg_header "$optional_dump" "$optional_path" "$optional_age" optional
for service in vaultwarden gitea paperless; do
if [ ! -d "$REPORT_ROOT" ]; then
warnings+=("REPORT_ROOT_MISSING $REPORT_ROOT")
break
fi
latest="$(find "$REPORT_ROOT" -maxdepth 1 -type f -name "$service-*.md" | sort | tail -n 1 || true)"
if [ -z "$latest" ]; then
warnings+=("REPORT_MISSING $service")
continue
fi
age="$(check_file_age_days "$latest")"
if [ "$age" -gt "$MAX_REPORT_AGE_DAYS" ]; then
warnings+=("REPORT_STALE $service age=${age}d file=$(basename "$latest")")
else
info+=("REPORT_OK $service age=${age}d file=$(basename "$latest")")
fi
done
echo "# Restore Freshness Check"
echo
echo "Timestamp: $(date '+%F %T')"
echo "Critical: ${#critical[@]}"
echo "Warnings: ${#warnings[@]}"
echo "Info: ${#info[@]}"
echo
if [ "${#critical[@]}" -gt 0 ]; then
echo "## Critical"
printf -- '- %s\n' "${critical[@]}"
echo
fi
if [ "${#warnings[@]}" -gt 0 ]; then
echo "## Warnings"
printf -- '- %s\n' "${warnings[@]}"
echo
fi
if [ "${#info[@]}" -gt 0 ]; then
echo "## Info"
printf -- '- %s\n' "${info[@]}"
fi
[ "${#critical[@]}" -eq 0 ]