#!/bin/bash set -euo pipefail # Shared PostgreSQL 18 Cluster Restore Drill # # Beweist, dass der komplette Shared-Postgres-Cluster aus den Dump-Artefakten # wiederhergestellt werden kann: # 1. Globals (Rollen) aus pg_dumpall --globals-only # 2. Per-DB Custom-Format-Dumps: paperless, mailarchiver, authelia, # nextcloud, mealie # # Bekannter Sonderfall (docs/RESTORE_MATRIX.md): # - CREATE ROLE mailarchiver scheitert, weil der User gleichzeitig der # Dump-Admin-User ist. Das ALTER ROLE danach muss trotzdem durchlaufen. # Der Test toleriert diesen spezifischen Fehler. # # Produktive PostgreSQL-Container und -Datenpfade werden NICHT angefasst. SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" . "$SCRIPT_DIR/common.sh" WHATIF=0 KEEP_DATA=0 for arg in "$@"; do case "$arg" in --what-if) WHATIF=1 ;; --keep-data) KEEP_DATA=1 ;; *) echo "Unknown argument: $arg" >&2; exit 1 ;; esac done RESTORE_ROOT="/mnt/user/backups/restore-lab/shared-pg-cluster" REPORT_ROOT="/mnt/user/backups/restore-reports" COMPOSE_FILE="$SCRIPT_DIR/shared-pg-cluster-compose.test.yml" REPORT_FILE="$REPORT_ROOT/shared-pg-cluster-$(date +%F).md" DUMP_ROOT="/mnt/user/backups/borg/dumps/latest" # Alle erwarteten Dumps GLOBALS_DUMP="$DUMP_ROOT/postgresql17-globals.sql" PAPERLESS_DUMP="$DUMP_ROOT/postgresql17-paperless.dump" MAILARCHIVER_DUMP="$DUMP_ROOT/postgresql17-mailarchiver.dump" AUTHELIA_DUMP="$DUMP_ROOT/postgresql17-authelia.dump" NEXTCLOUD_DUMP="$DUMP_ROOT/nextcloud.dump" MEALIE_DUMP="$DUMP_ROOT/mealie.dump" if [ "$WHATIF" -eq 1 ]; then cat < bekannter mailarchiver-Rollenkonflikt wird toleriert 3. DBs anlegen: paperless, mailarchiver, authelia, nextcloud, mealie 4. Per-DB pg_restore fuer jede DB 5. Tabellen-Count pro DB als Sanity-Check 6. Report schreiben EOF exit 0 fi require_cmd docker require_path "$COMPOSE_FILE" require_path "$GLOBALS_DUMP" require_path "$PAPERLESS_DUMP" require_path "$MAILARCHIVER_DUMP" # Authelia/Nextcloud/Mealie-Dumps sind optional (koennen fehlen) OPTIONAL_DUMPS="" RESTORE_SUCCESS=0 cleanup() { docker compose -f "$COMPOSE_FILE" down -v >/dev/null 2>&1 || true if [ "$RESTORE_SUCCESS" -ne 1 ]; then preserve_on_failure "shared-pg-cluster" "$RESTORE_ROOT" return fi if [ "$KEEP_DATA" -ne 1 ]; then rm -rf "$RESTORE_ROOT" fi } trap cleanup EXIT rm -rf "$RESTORE_ROOT" mkdir -p "$RESTORE_ROOT/data" # Stufe 1: Test-Postgres hochfahren docker compose -f "$COMPOSE_FILE" up -d restoretest-shared-pg >/dev/null until docker exec restoretest-shared-pg pg_isready -U postgres >/dev/null 2>&1; do sleep 2 done # Extra Wartezeit fuer Entrypoint-Init sleep 3 # Stufe 2: Globals einspielen # Der Globals-Dump enthaelt CREATE ROLE fuer alle DB-User. Der bekannte # Konflikt ist, dass CREATE ROLE mailarchiver scheitern kann wenn dieser # User auch der Dump-Admin ist. Wir tolerieren das und pruefen nur auf # FATAL/PANIC. globals_status="ok" docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -f - < "$GLOBALS_DUMP" >/tmp/shared-pg-globals.log 2>&1 || true if grep -qiE "FATAL|PANIC" /tmp/shared-pg-globals.log; then globals_status="failed (FATAL/PANIC)" cat /tmp/shared-pg-globals.log >&2 exit 1 fi # Stufe 3: DBs anlegen und Dumps einspielen declare -A DB_STATUS declare -A TABLE_COUNTS restore_db() { local dbname="$1" local dbuser="$2" local dump_path="$3" local optional="${4:-no}" if [ ! -f "$dump_path" ]; then if [ "$optional" = "yes" ]; then DB_STATUS[$dbname]="skipped (dump missing)" TABLE_COUNTS[$dbname]="n/a" return 0 fi DB_STATUS[$dbname]="failed (dump missing)" TABLE_COUNTS[$dbname]="n/a" return 1 fi # Rolle anlegen falls nicht durch Globals erzeugt (idempotent) docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -c "DO \$\$ BEGIN CREATE ROLE $dbuser WITH LOGIN PASSWORD 'restoretest-$dbuser'; EXCEPTION WHEN duplicate_object THEN NULL; END \$\$;" >/dev/null 2>&1 || true # DB anlegen docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -c "SELECT 1 FROM pg_database WHERE datname='$dbname'" 2>/dev/null | grep -q 1 || \ docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ createdb -U postgres -O "$dbuser" "$dbname" 2>/dev/null || true # pg_restore mit Retry local restore_ok=0 for attempt in $(seq 1 5); do if docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ pg_restore -U postgres -d "$dbname" --clean --if-exists --no-owner --no-privileges \ < "$dump_path" 2>/tmp/shared-pg-restore-${dbname}.err; then restore_ok=1 break fi if grep -qiE "starting up|shutting down|connection refused" /tmp/shared-pg-restore-${dbname}.err; then sleep 5 continue fi # --clean erzeugt "does not exist" Warnungen beim ersten Import -> ignorieren if grep -qiE "FATAL|PANIC" /tmp/shared-pg-restore-${dbname}.err; then DB_STATUS[$dbname]="failed" TABLE_COUNTS[$dbname]="n/a" cat /tmp/shared-pg-restore-${dbname}.err >&2 return 1 fi restore_ok=1 break done if [ "$restore_ok" -ne 1 ]; then DB_STATUS[$dbname]="failed (timeout)" TABLE_COUNTS[$dbname]="n/a" return 1 fi DB_STATUS[$dbname]="ok" # Tabellen zaehlen TABLE_COUNTS[$dbname]="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -d "$dbname" -tAc \ "SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" \ 2>/dev/null | tr -d '[:space:]' || echo "n/a")" } restore_db "paperless" "paperless" "$PAPERLESS_DUMP" restore_db "mailarchiver" "mailarchiver" "$MAILARCHIVER_DUMP" restore_db "authelia" "authelia" "$AUTHELIA_DUMP" "yes" restore_db "nextcloud" "nextcloud" "$NEXTCLOUD_DUMP" "yes" restore_db "mealie" "mealie" "$MEALIE_DUMP" "yes" # Stufe 4: data_checksums pruefen checksums="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -tAc "SHOW data_checksums;" 2>/dev/null | tr -d '[:space:]' || echo "n/a")" # Stufe 5: DB-Liste db_list="$(docker exec -i -e PGPASSWORD=restoretest-shared-pg-superuser restoretest-shared-pg \ psql -U postgres -tAc "SELECT datname FROM pg_database WHERE NOT datistemplate ORDER BY datname;" \ 2>/dev/null | tr '\n' ', ' | sed 's/,$//' || echo "n/a")" # Report bauen report_body="# Shared PostgreSQL 18 Cluster Restore Drill - $(date +%F) - Dump source: \`$DUMP_ROOT\` - Restore root: \`$RESTORE_ROOT\` - Result: \`SUCCESS\` ## Checks - Test-Postgres healthy: \`ok\` - Globals import: \`$globals_status\` - data_checksums: \`$checksums\` - Databases: \`$db_list\` ## Per-DB Restore | Database | Restore | Tables | |---|---|---| | paperless | \`${DB_STATUS[paperless]}\` | \`${TABLE_COUNTS[paperless]}\` | | mailarchiver | \`${DB_STATUS[mailarchiver]}\` | \`${TABLE_COUNTS[mailarchiver]}\` | | authelia | \`${DB_STATUS[authelia]}\` | \`${TABLE_COUNTS[authelia]}\` | | nextcloud | \`${DB_STATUS[nextcloud]}\` | \`${TABLE_COUNTS[nextcloud]}\` | | mealie | \`${DB_STATUS[mealie]}\` | \`${TABLE_COUNTS[mealie]}\` | ## Scope Dieser Drill beweist, dass der gesamte Shared-PostgreSQL-18-Cluster aus den taeglichen Dump-Artefakten wiederhergestellt werden kann: Globals (Rollen) + per-DB Custom-Format-Dumps. Der bekannte mailarchiver- Bootstrap-Rollenkonflikt wird toleriert. ## Notes - Produktive PostgreSQL-Container und -Datenpfade wurden nicht beruehrt. - Test-Postgres nutzt Wegwerf-Superuser-Passwort. - Test-Daten wurden \`$([ "$KEEP_DATA" -eq 1 ] && echo behalten || echo bereinigt)\`. " write_report "$REPORT_FILE" < $REPORT_FILE"