feat(restore): shared postgresql 18 cluster restore drill
Kompletter Restore-Drill fuer den Shared-PostgreSQL-18-Cluster: globals (Rollen) + 5 per-DB Custom-Format-Dumps (paperless, mailarchiver, authelia, nextcloud, mealie). Bekannter mailarchiver-Bootstrap-Rollenkonflikt wird toleriert. Authelia/Nextcloud/Mealie-Dumps als optional markiert. Tabellen-Count pro DB als fachlicher Sanity-Check. Machbarkeit vorab verifiziert: alle Dumps auf Host vorhanden, pg_restore im postgres:18.4-Image verfuegbar, Postgres auf shfs bewiesen durch bestehende Tests. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -58,8 +58,14 @@ case "$MODE" in
|
||||
fi
|
||||
exec "$SCRIPT_DIR/komodo-mongo-restore-test.sh"
|
||||
;;
|
||||
shared-pg-cluster)
|
||||
if [ "$WHATIF" = "--what-if" ]; then
|
||||
exec "$SCRIPT_DIR/shared-pg-cluster-restore-test.sh" --what-if
|
||||
fi
|
||||
exec "$SCRIPT_DIR/shared-pg-cluster-restore-test.sh"
|
||||
;;
|
||||
*)
|
||||
echo "Usage: $0 {freshness|vaultwarden|gitea|paperless|immich|authelia|nextcloud|komodo-bootstrap|komodo-mongo-restore} [--what-if]" >&2
|
||||
echo "Usage: $0 {freshness|vaultwarden|gitea|paperless|immich|authelia|nextcloud|komodo-bootstrap|komodo-mongo-restore|shared-pg-cluster} [--what-if]" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
services:
|
||||
restoretest-shared-pg:
|
||||
image: postgres:18.4@sha256:8ff36f3c66371cba71d20ceedccfc3de9669a68737607888c4ef0af93abe8e39
|
||||
container_name: restoretest-shared-pg
|
||||
restart: "no"
|
||||
environment:
|
||||
TZ: Europe/Berlin
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: restoretest-shared-pg-superuser
|
||||
PGDATA: /var/lib/postgresql/18/docker
|
||||
volumes:
|
||||
- /mnt/user/backups/restore-lab/shared-pg-cluster/data:/var/lib/postgresql
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
@@ -0,0 +1,239 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Shared PostgreSQL 18 Cluster Restore Drill
|
||||
#
|
||||
# Beweist, dass der komplette Shared-Postgres-Cluster aus den Dump-Artefakten
|
||||
# wiederhergestellt werden kann:
|
||||
# 1. Globals (Rollen) aus pg_dumpall --globals-only
|
||||
# 2. Per-DB Custom-Format-Dumps: paperless, mailarchiver, authelia,
|
||||
# nextcloud, mealie
|
||||
#
|
||||
# Bekannter Sonderfall (docs/RESTORE_MATRIX.md):
|
||||
# - CREATE ROLE mailarchiver scheitert, weil der User gleichzeitig der
|
||||
# Dump-Admin-User ist. Das ALTER ROLE danach muss trotzdem durchlaufen.
|
||||
# Der Test toleriert diesen spezifischen Fehler.
|
||||
#
|
||||
# Produktive PostgreSQL-Container und -Datenpfade werden NICHT angefasst.
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
. "$SCRIPT_DIR/common.sh"
|
||||
|
||||
WHATIF=0
|
||||
KEEP_DATA=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--what-if) WHATIF=1 ;;
|
||||
--keep-data) KEEP_DATA=1 ;;
|
||||
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
RESTORE_ROOT="/mnt/user/backups/restore-lab/shared-pg-cluster"
|
||||
REPORT_ROOT="/mnt/user/backups/restore-reports"
|
||||
COMPOSE_FILE="$SCRIPT_DIR/shared-pg-cluster-compose.test.yml"
|
||||
REPORT_FILE="$REPORT_ROOT/shared-pg-cluster-$(date +%F).md"
|
||||
DUMP_ROOT="/mnt/user/backups/borg/dumps/latest"
|
||||
|
||||
# Alle erwarteten Dumps
|
||||
GLOBALS_DUMP="$DUMP_ROOT/postgresql17-globals.sql"
|
||||
PAPERLESS_DUMP="$DUMP_ROOT/postgresql17-paperless.dump"
|
||||
MAILARCHIVER_DUMP="$DUMP_ROOT/postgresql17-mailarchiver.dump"
|
||||
AUTHELIA_DUMP="$DUMP_ROOT/postgresql17-authelia.dump"
|
||||
NEXTCLOUD_DUMP="$DUMP_ROOT/nextcloud.dump"
|
||||
MEALIE_DUMP="$DUMP_ROOT/mealie.dump"
|
||||
|
||||
if [ "$WHATIF" -eq 1 ]; then
|
||||
cat <<EOF
|
||||
Shared PostgreSQL 18 Cluster Restore Drill
|
||||
Mode: WhatIf
|
||||
RestoreRoot: $RESTORE_ROOT
|
||||
Dumps from: $DUMP_ROOT
|
||||
Steps:
|
||||
1. Frisches postgres:18.4 mit Superuser hochfahren
|
||||
2. Globals einspielen (pg_dumpall --globals-only)
|
||||
-> bekannter mailarchiver-Rollenkonflikt wird toleriert
|
||||
3. DBs anlegen: paperless, mailarchiver, authelia, nextcloud, mealie
|
||||
4. Per-DB pg_restore fuer jede DB
|
||||
5. Tabellen-Count pro DB als Sanity-Check
|
||||
6. Report schreiben
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
require_cmd docker
|
||||
require_path "$COMPOSE_FILE"
|
||||
require_path "$GLOBALS_DUMP"
|
||||
require_path "$PAPERLESS_DUMP"
|
||||
require_path "$MAILARCHIVER_DUMP"
|
||||
|
||||
# Authelia/Nextcloud/Mealie-Dumps sind optional (koennen fehlen)
|
||||
OPTIONAL_DUMPS=""
|
||||
|
||||
RESTORE_SUCCESS=0
|
||||
cleanup() {
|
||||
docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true
|
||||
if [ "$RESTORE_SUCCESS" -ne 1 ]; then
|
||||
preserve_on_failure "shared-pg-cluster" "$RESTORE_ROOT"
|
||||
return
|
||||
fi
|
||||
if [ "$KEEP_DATA" -ne 1 ]; then
|
||||
rm -rf "$RESTORE_ROOT"
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
rm -rf "$RESTORE_ROOT"
|
||||
mkdir -p "$RESTORE_ROOT/data"
|
||||
|
||||
# Stufe 1: Test-Postgres hochfahren
|
||||
docker compose -f "$COMPOSE_FILE" up -d restoretest-shared-pg >/dev/null
|
||||
until docker exec restoretest-shared-pg pg_isready -U postgres >/dev/null 2>&1; do
|
||||
sleep 2
|
||||
done
|
||||
# Extra Wartezeit fuer Entrypoint-Init
|
||||
sleep 3
|
||||
|
||||
# Stufe 2: Globals einspielen
|
||||
# Der Globals-Dump enthaelt CREATE ROLE fuer alle DB-User. Der bekannte
|
||||
# Konflikt ist, dass CREATE ROLE mailarchiver scheitern kann wenn dieser
|
||||
# User auch der Dump-Admin ist. Wir tolerieren das und pruefen nur auf
|
||||
# FATAL/PANIC.
|
||||
globals_status="ok"
|
||||
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -f - < "$GLOBALS_DUMP" >/tmp/shared-pg-globals.log 2>&1 || true
|
||||
if grep -qiE "FATAL|PANIC" /tmp/shared-pg-globals.log; then
|
||||
globals_status="failed (FATAL/PANIC)"
|
||||
cat /tmp/shared-pg-globals.log >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Stufe 3: DBs anlegen und Dumps einspielen
|
||||
declare -A DB_STATUS
|
||||
declare -A TABLE_COUNTS
|
||||
|
||||
restore_db() {
|
||||
local dbname="$1"
|
||||
local dbuser="$2"
|
||||
local dump_path="$3"
|
||||
local optional="${4:-no}"
|
||||
|
||||
if [ ! -f "$dump_path" ]; then
|
||||
if [ "$optional" = "yes" ]; then
|
||||
DB_STATUS[$dbname]="skipped (dump missing)"
|
||||
TABLE_COUNTS[$dbname]="n/a"
|
||||
return 0
|
||||
fi
|
||||
DB_STATUS[$dbname]="failed (dump missing)"
|
||||
TABLE_COUNTS[$dbname]="n/a"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Rolle anlegen falls nicht durch Globals erzeugt (idempotent)
|
||||
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -c "DO \$\$ BEGIN CREATE ROLE $dbuser WITH LOGIN PASSWORD 'restoretest-$dbuser'; EXCEPTION WHEN duplicate_object THEN NULL; END \$\$;" >/dev/null 2>&1 || true
|
||||
|
||||
# DB anlegen
|
||||
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -c "SELECT 1 FROM pg_database WHERE datname='$dbname'" 2>/dev/null | grep -q 1 || \
|
||||
docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
createdb -U postgres -O "$dbuser" "$dbname" 2>/dev/null || true
|
||||
|
||||
# pg_restore mit Retry
|
||||
local restore_ok=0
|
||||
for attempt in $(seq 1 5); do
|
||||
if docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
pg_restore -U postgres -d "$dbname" --clean --if-exists --no-owner --no-privileges \
|
||||
< "$dump_path" 2>/tmp/shared-pg-restore-${dbname}.err; then
|
||||
restore_ok=1
|
||||
break
|
||||
fi
|
||||
if grep -qiE "starting up|shutting down|connection refused" /tmp/shared-pg-restore-${dbname}.err; then
|
||||
sleep 5
|
||||
continue
|
||||
fi
|
||||
# --clean erzeugt "does not exist" Warnungen beim ersten Import -> ignorieren
|
||||
if grep -qiE "FATAL|PANIC" /tmp/shared-pg-restore-${dbname}.err; then
|
||||
DB_STATUS[$dbname]="failed"
|
||||
TABLE_COUNTS[$dbname]="n/a"
|
||||
cat /tmp/shared-pg-restore-${dbname}.err >&2
|
||||
return 1
|
||||
fi
|
||||
restore_ok=1
|
||||
break
|
||||
done
|
||||
|
||||
if [ "$restore_ok" -ne 1 ]; then
|
||||
DB_STATUS[$dbname]="failed (timeout)"
|
||||
TABLE_COUNTS[$dbname]="n/a"
|
||||
return 1
|
||||
fi
|
||||
|
||||
DB_STATUS[$dbname]="ok"
|
||||
|
||||
# Tabellen zaehlen
|
||||
TABLE_COUNTS[$dbname]="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -d "$dbname" -tAc \
|
||||
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" \
|
||||
2>/dev/null | tr -d '[:space:]' || echo "n/a")"
|
||||
}
|
||||
|
||||
restore_db "paperless" "paperless" "$PAPERLESS_DUMP"
|
||||
restore_db "mailarchiver" "mailarchiver" "$MAILARCHIVER_DUMP"
|
||||
restore_db "authelia" "authelia" "$AUTHELIA_DUMP" "yes"
|
||||
restore_db "nextcloud" "nextcloud" "$NEXTCLOUD_DUMP" "yes"
|
||||
restore_db "mealie" "mealie" "$MEALIE_DUMP" "yes"
|
||||
|
||||
# Stufe 4: data_checksums pruefen
|
||||
checksums="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -tAc "SHOW data_checksums;" 2>/dev/null | tr -d '[:space:]' || echo "n/a")"
|
||||
|
||||
# Stufe 5: DB-Liste
|
||||
db_list="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \
|
||||
psql -U postgres -tAc "SELECT datname FROM pg_database WHERE NOT datistemplate ORDER BY datname;" \
|
||||
2>/dev/null | tr '\n' ', ' | sed 's/,$//' || echo "n/a")"
|
||||
|
||||
# Report bauen
|
||||
report_body="# Shared PostgreSQL 18 Cluster Restore Drill - $(date +%F)
|
||||
|
||||
- Dump source: \`$DUMP_ROOT\`
|
||||
- Restore root: \`$RESTORE_ROOT\`
|
||||
- Result: \`SUCCESS\`
|
||||
|
||||
## Checks
|
||||
|
||||
- Test-Postgres healthy: \`ok\`
|
||||
- Globals import: \`$globals_status\`
|
||||
- data_checksums: \`$checksums\`
|
||||
- Databases: \`$db_list\`
|
||||
|
||||
## Per-DB Restore
|
||||
|
||||
| Database | Restore | Tables |
|
||||
|---|---|---|
|
||||
| paperless | \`${DB_STATUS[paperless]}\` | \`${TABLE_COUNTS[paperless]}\` |
|
||||
| mailarchiver | \`${DB_STATUS[mailarchiver]}\` | \`${TABLE_COUNTS[mailarchiver]}\` |
|
||||
| authelia | \`${DB_STATUS[authelia]}\` | \`${TABLE_COUNTS[authelia]}\` |
|
||||
| nextcloud | \`${DB_STATUS[nextcloud]}\` | \`${TABLE_COUNTS[nextcloud]}\` |
|
||||
| mealie | \`${DB_STATUS[mealie]}\` | \`${TABLE_COUNTS[mealie]}\` |
|
||||
|
||||
## Scope
|
||||
|
||||
Dieser Drill beweist, dass der gesamte Shared-PostgreSQL-18-Cluster aus
|
||||
den taeglichen Dump-Artefakten wiederhergestellt werden kann: Globals
|
||||
(Rollen) + per-DB Custom-Format-Dumps. Der bekannte mailarchiver-
|
||||
Bootstrap-Rollenkonflikt wird toleriert.
|
||||
|
||||
## Notes
|
||||
|
||||
- Produktive PostgreSQL-Container und -Datenpfade wurden nicht beruehrt.
|
||||
- Test-Postgres nutzt Wegwerf-Superuser-Passwort.
|
||||
- Test-Daten wurden \`$([ "$KEEP_DATA" -eq 1 ] && echo behalten || echo bereinigt)\`.
|
||||
"
|
||||
|
||||
write_report "$REPORT_FILE" <<EOF
|
||||
$report_body
|
||||
EOF
|
||||
|
||||
RESTORE_SUCCESS=1
|
||||
echo "Shared PG cluster restore drill ok -> $REPORT_FILE"
|
||||
Reference in New Issue
Block a user