d1f9491b24
Kompletter Restore-Drill fuer den Shared-PostgreSQL-18-Cluster: globals (Rollen) + 5 per-DB Custom-Format-Dumps (paperless, mailarchiver, authelia, nextcloud, mealie). Bekannter mailarchiver-Bootstrap-Rollenkonflikt wird toleriert. Authelia/Nextcloud/Mealie-Dumps als optional markiert. Tabellen-Count pro DB als fachlicher Sanity-Check. Machbarkeit vorab verifiziert: alle Dumps auf Host vorhanden, pg_restore im postgres:18.4-Image verfuegbar, Postgres auf shfs bewiesen durch bestehende Tests. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
240 lines
7.9 KiB
Bash
240 lines
7.9 KiB
Bash
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Shared PostgreSQL 18 Cluster Restore Drill
|
|
#
|
|
# Beweist, dass der komplette Shared-Postgres-Cluster aus den Dump-Artefakten
|
|
# wiederhergestellt werden kann:
|
|
# 1. Globals (Rollen) aus pg_dumpall --globals-only
|
|
# 2. Per-DB Custom-Format-Dumps: paperless, mailarchiver, authelia,
|
|
# nextcloud, mealie
|
|
#
|
|
# Bekannter Sonderfall (docs/RESTORE_MATRIX.md):
|
|
# - CREATE ROLE mailarchiver scheitert, weil der User gleichzeitig der
|
|
# Dump-Admin-User ist. Das ALTER ROLE danach muss trotzdem durchlaufen.
|
|
# Der Test toleriert diesen spezifischen Fehler.
|
|
#
|
|
# Produktive PostgreSQL-Container und -Datenpfade werden NICHT angefasst.
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
. "$SCRIPT_DIR/common.sh"
|
|
|
|
WHATIF=0
|
|
KEEP_DATA=0
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--what-if) WHATIF=1 ;;
|
|
--keep-data) KEEP_DATA=1 ;;
|
|
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
RESTORE_ROOT="/mnt/user/backups/restore-lab/shared-pg-cluster"
|
|
REPORT_ROOT="/mnt/user/backups/restore-reports"
|
|
COMPOSE_FILE="$SCRIPT_DIR/shared-pg-cluster-compose.test.yml"
|
|
REPORT_FILE="$REPORT_ROOT/shared-pg-cluster-$(date +%F).md"
|
|
DUMP_ROOT="/mnt/user/backups/borg/dumps/latest"
|
|
|
|
# Alle erwarteten Dumps
|
|
GLOBALS_DUMP="$DUMP_ROOT/postgresql17-globals.sql"
|
|
PAPERLESS_DUMP="$DUMP_ROOT/postgresql17-paperless.dump"
|
|
MAILARCHIVER_DUMP="$DUMP_ROOT/postgresql17-mailarchiver.dump"
|
|
AUTHELIA_DUMP="$DUMP_ROOT/postgresql17-authelia.dump"
|
|
NEXTCLOUD_DUMP="$DUMP_ROOT/nextcloud.dump"
|
|
MEALIE_DUMP="$DUMP_ROOT/mealie.dump"
|
|
|
|
if [ "$WHATIF" -eq 1 ]; then
|
|
cat <<EOF
|
|
Shared PostgreSQL 18 Cluster Restore Drill
|
|
Mode: WhatIf
|
|
RestoreRoot: $RESTORE_ROOT
|
|
Dumps from: $DUMP_ROOT
|
|
Steps:
|
|
1. Frisches postgres:18.4 mit Superuser hochfahren
|
|
2. Globals einspielen (pg_dumpall --globals-only)
|
|
-> bekannter mailarchiver-Rollenkonflikt wird toleriert
|
|
3. DBs anlegen: paperless, mailarchiver, authelia, nextcloud, mealie
|
|
4. Per-DB pg_restore fuer jede DB
|
|
5. Tabellen-Count pro DB als Sanity-Check
|
|
6. Report schreiben
|
|
EOF
|
|
exit 0
|
|
fi
|
|
|
|
require_cmd docker
|
|
require_path "$COMPOSE_FILE"
|
|
require_path "$GLOBALS_DUMP"
|
|
require_path "$PAPERLESS_DUMP"
|
|
require_path "$MAILARCHIVER_DUMP"
|
|
|
|
# Authelia/Nextcloud/Mealie-Dumps sind optional (koennen fehlen)
|
|
OPTIONAL_DUMPS=""
|
|
|
|
RESTORE_SUCCESS=0
|
|
cleanup() {
|
|
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
|
|
if [ "$RESTORE_SUCCESS" -ne 1 ]; then
|
|
preserve_on_failure "shared-pg-cluster" "$RESTORE_ROOT"
|
|
return
|
|
fi
|
|
if [ "$KEEP_DATA" -ne 1 ]; then
|
|
rm -rf "$RESTORE_ROOT"
|
|
fi
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
rm -rf "$RESTORE_ROOT"
|
|
mkdir -p "$RESTORE_ROOT/data"
|
|
|
|
# Stufe 1: Test-Postgres hochfahren
|
|
docker compose -f "$COMPOSE_FILE" up -d restoretest-shared-pg >/dev/null
|
|
until docker exec restoretest-shared-pg pg_isready -U postgres >/dev/null 2>&1; do
|
|
sleep 2
|
|
done
|
|
# Extra Wartezeit fuer Entrypoint-Init
|
|
sleep 3
|
|
|
|
# Stufe 2: Globals einspielen
|
|
# Der Globals-Dump enthaelt CREATE ROLE fuer alle DB-User. Der bekannte
|
|
# Konflikt ist, dass CREATE ROLE mailarchiver scheitern kann wenn dieser
|
|
# User auch der Dump-Admin ist. Wir tolerieren das und pruefen nur auf
|
|
# FATAL/PANIC.
|
|
globals_status="ok"
|
|
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -f - < "$GLOBALS_DUMP" >/tmp/shared-pg-globals.log 2>&1 || true
|
|
if grep -qiE "FATAL|PANIC" /tmp/shared-pg-globals.log; then
|
|
globals_status="failed (FATAL/PANIC)"
|
|
cat /tmp/shared-pg-globals.log >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Stufe 3: DBs anlegen und Dumps einspielen
|
|
declare -A DB_STATUS
|
|
declare -A TABLE_COUNTS
|
|
|
|
restore_db() {
|
|
local dbname="$1"
|
|
local dbuser="$2"
|
|
local dump_path="$3"
|
|
local optional="${4:-no}"
|
|
|
|
if [ ! -f "$dump_path" ]; then
|
|
if [ "$optional" = "yes" ]; then
|
|
DB_STATUS[$dbname]="skipped (dump missing)"
|
|
TABLE_COUNTS[$dbname]="n/a"
|
|
return 0
|
|
fi
|
|
DB_STATUS[$dbname]="failed (dump missing)"
|
|
TABLE_COUNTS[$dbname]="n/a"
|
|
return 1
|
|
fi
|
|
|
|
# Rolle anlegen falls nicht durch Globals erzeugt (idempotent)
|
|
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -c "DO \$\$ BEGIN CREATE ROLE $dbuser WITH LOGIN PASSWORD 'restoretest-$dbuser'; EXCEPTION WHEN duplicate_object THEN NULL; END \$\$;" >/dev/null 2>&1 || true
|
|
|
|
# DB anlegen
|
|
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -c "SELECT 1 FROM pg_database WHERE datname='$dbname'" 2>/dev/null | grep -q 1 || \
|
|
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
createdb -U postgres -O "$dbuser" "$dbname" 2>/dev/null || true
|
|
|
|
# pg_restore mit Retry
|
|
local restore_ok=0
|
|
for attempt in $(seq 1 5); do
|
|
if docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
pg_restore -U postgres -d "$dbname" --clean --if-exists --no-owner --no-privileges \
|
|
< "$dump_path" 2>/tmp/shared-pg-restore-${dbname}.err; then
|
|
restore_ok=1
|
|
break
|
|
fi
|
|
if grep -qiE "starting up|shutting down|connection refused" /tmp/shared-pg-restore-${dbname}.err; then
|
|
sleep 5
|
|
continue
|
|
fi
|
|
# --clean erzeugt "does not exist" Warnungen beim ersten Import -> ignorieren
|
|
if grep -qiE "FATAL|PANIC" /tmp/shared-pg-restore-${dbname}.err; then
|
|
DB_STATUS[$dbname]="failed"
|
|
TABLE_COUNTS[$dbname]="n/a"
|
|
cat /tmp/shared-pg-restore-${dbname}.err >&2
|
|
return 1
|
|
fi
|
|
restore_ok=1
|
|
break
|
|
done
|
|
|
|
if [ "$restore_ok" -ne 1 ]; then
|
|
DB_STATUS[$dbname]="failed (timeout)"
|
|
TABLE_COUNTS[$dbname]="n/a"
|
|
return 1
|
|
fi
|
|
|
|
DB_STATUS[$dbname]="ok"
|
|
|
|
# Tabellen zaehlen
|
|
TABLE_COUNTS[$dbname]="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -d "$dbname" -tAc \
|
|
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" \
|
|
2>/dev/null | tr -d '[:space:]' || echo "n/a")"
|
|
}
|
|
|
|
restore_db "paperless" "paperless" "$PAPERLESS_DUMP"
|
|
restore_db "mailarchiver" "mailarchiver" "$MAILARCHIVER_DUMP"
|
|
restore_db "authelia" "authelia" "$AUTHELIA_DUMP" "yes"
|
|
restore_db "nextcloud" "nextcloud" "$NEXTCLOUD_DUMP" "yes"
|
|
restore_db "mealie" "mealie" "$MEALIE_DUMP" "yes"
|
|
|
|
# Stufe 4: data_checksums pruefen
|
|
checksums="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -tAc "SHOW data_checksums;" 2>/dev/null | tr -d '[:space:]' || echo "n/a")"
|
|
|
|
# Stufe 5: DB-Liste
|
|
db_list="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
|
psql -U postgres -tAc "SELECT datname FROM pg_database WHERE NOT datistemplate ORDER BY datname;" \
|
|
2>/dev/null | tr '\n' ', ' | sed 's/,$//' || echo "n/a")"
|
|
|
|
# Report bauen
|
|
report_body="# Shared PostgreSQL 18 Cluster Restore Drill - $(date +%F)
|
|
|
|
- Dump source: \`$DUMP_ROOT\`
|
|
- Restore root: \`$RESTORE_ROOT\`
|
|
- Result: \`SUCCESS\`
|
|
|
|
## Checks
|
|
|
|
- Test-Postgres healthy: \`ok\`
|
|
- Globals import: \`$globals_status\`
|
|
- data_checksums: \`$checksums\`
|
|
- Databases: \`$db_list\`
|
|
|
|
## Per-DB Restore
|
|
|
|
| Database | Restore | Tables |
|
|
|---|---|---|
|
|
| paperless | \`${DB_STATUS[paperless]}\` | \`${TABLE_COUNTS[paperless]}\` |
|
|
| mailarchiver | \`${DB_STATUS[mailarchiver]}\` | \`${TABLE_COUNTS[mailarchiver]}\` |
|
|
| authelia | \`${DB_STATUS[authelia]}\` | \`${TABLE_COUNTS[authelia]}\` |
|
|
| nextcloud | \`${DB_STATUS[nextcloud]}\` | \`${TABLE_COUNTS[nextcloud]}\` |
|
|
| mealie | \`${DB_STATUS[mealie]}\` | \`${TABLE_COUNTS[mealie]}\` |
|
|
|
|
## Scope
|
|
|
|
Dieser Drill beweist, dass der gesamte Shared-PostgreSQL-18-Cluster aus
|
|
den taeglichen Dump-Artefakten wiederhergestellt werden kann: Globals
|
|
(Rollen) + per-DB Custom-Format-Dumps. Der bekannte mailarchiver-
|
|
Bootstrap-Rollenkonflikt wird toleriert.
|
|
|
|
## Notes
|
|
|
|
- Produktive PostgreSQL-Container und -Datenpfade wurden nicht beruehrt.
|
|
- Test-Postgres nutzt Wegwerf-Superuser-Passwort.
|
|
- Test-Daten wurden \`$([ "$KEEP_DATA" -eq 1 ] && echo behalten || echo bereinigt)\`.
|
|
"
|
|
|
|
write_report "$REPORT_FILE" <<EOF
|
|
$report_body
|
|
EOF
|
|
|
|
RESTORE_SUCCESS=1
|
|
echo "Shared PG cluster restore drill ok -> $REPORT_FILE"
|