Files
homelab-infra/ops/restore-tests/immich-restore-test.sh
T

248 lines
7.7 KiB
Bash
Executable File

#!/bin/bash
set -euo pipefail
# Immich Restore Smoke Test
#
# Nicht-destruktiver Restore-Smoke-Test fuer Immich.
# - liest immich.dump aus dem produktiven Borg-Archiv
# - importiert in eine isolierte Test-Postgres-Instanz mit gleichem
# VectorChord-Image wie Produktion
# - startet einen isolierten Immich-Server-Container ohne Traefik und
# ohne ML-Container
# - prueft Login-Page und Asset-Anzahl aus DB
# - bereinigt anschliessend
#
# Produktiver Immich-Stack wird NICHT angefasst.
# Produktive Foto-Pfade unter /mnt/user/photos/* werden NICHT gemountet.
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/common.sh"
WHATIF=0
KEEP_DATA=0
for arg in "$@"; do
case "$arg" in
--what-if) WHATIF=1 ;;
--keep-data) KEEP_DATA=1 ;;
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
esac
done
RESTORE_ROOT="/mnt/user/backups/restore-lab/immich"
REPORT_ROOT="/mnt/user/backups/restore-reports"
EXTRACT_DIR="$BORG_RESTORE_HOST_ROOT/immich-extract"
COMPOSE_FILE="$SCRIPT_DIR/immich-compose.test.yml"
REPORT_FILE="$REPORT_ROOT/immich-$(date +%F).md"
if [ "$WHATIF" -eq 1 ]; then
cat <<EOF
Immich restore test
Mode: WhatIf
RestoreRoot: $RESTORE_ROOT
ReportRoot: $REPORT_ROOT
Expected Borg source paths:
- local/borg-dumps/latest/immich.dump
Planned isolation:
- Test-Postgres: ghcr.io/immich-app/postgres:14-vectorchord0.4.3-pgvectors0.2.0
- Test-Redis: redis:8.8.0-alpine (rebuildbar, kein Restore)
- Test-Server: ghcr.io/immich-app/immich-server:release (Image-Pin wie Produktion)
- ML-Container bewusst weggelassen
- Test-Upload: leer, unter $RESTORE_ROOT/upload
- Productive photo paths NOT mounted: /mnt/user/photos/immich, /mnt/user/photos/family_archive
- Test endpoint: 127.0.0.1:12283 (no Traefik, no public domain)
Smoke-Test:
- Test-Postgres healthy
- pg_restore -Fc -> immich.dump
- HTTP 200/302/3xx von 127.0.0.1:12283
- Asset-Count aus DB
EOF
exit 0
fi
require_cmd docker
require_cmd curl
require_path "$BORG_PASSPHRASE_FILE_DEFAULT"
require_path "$COMPOSE_FILE"
cleanup() {
cleanup_compose "$COMPOSE_FILE"
if [ "$KEEP_DATA" -ne 1 ]; then
rm -rf "$RESTORE_ROOT"
fi
rm -rf "$EXTRACT_DIR"
}
trap cleanup EXIT
rm -rf "$EXTRACT_DIR" "$RESTORE_ROOT"
mkdir -p "$RESTORE_ROOT/postgres" "$RESTORE_ROOT/upload" "$RESTORE_ROOT/dumps/latest"
archive="$(latest_archive_name)"
repo="$(borg_repo_url)"
if [ -z "$archive" ] || [ -z "$repo" ]; then
echo "Could not resolve Borg repo/archive from borg-ui database" >&2
exit 1
fi
borg_extract "/restore/immich-extract" \
"local/borg-dumps/latest/immich.dump"
mv "$EXTRACT_DIR/local/borg-dumps/latest/immich.dump" "$RESTORE_ROOT/dumps/latest/immich.dump"
# Stufe 1: Test-Postgres und Test-Redis starten
docker compose -f "$COMPOSE_FILE" up -d \
restoretest-immich-postgres restoretest-immich-redis >/dev/null
# Warten auf Postgres ready
until docker exec restoretest-immich-postgres pg_isready -U immich -d immich >/dev/null 2>&1; do
sleep 2
done
# Einige Postgres-Images melden bereits "ready", waehrend die per ENV
# gewuenschte Datenbank noch im Entrypoint entsteht. Der Smoke-Test legt
# die isolierte Test-DB deshalb defensiv an und akzeptiert nur das Rennen,
# in dem die DB parallel bereits erzeugt wurde.
db_ok=0
for attempt in $(seq 1 12); do
if docker exec restoretest-immich-postgres sh -lc \
'createdb -U immich immich 2>/tmp/immich-createdb.err || grep -q "already exists" /tmp/immich-createdb.err'; then
db_ok=1
break
fi
sleep 5
done
if [ "$db_ok" -ne 1 ]; then
docker exec restoretest-immich-postgres sh -lc 'cat /tmp/immich-createdb.err >&2' || true
exit 1
fi
# Stufe 2: Dump in Test-Postgres importieren.
# Der Postgres-Entrypoint kann kurz nach "ready" noch vom Init-Server auf
# den finalen Server wechseln; pg_restore toleriert deshalb nur transiente
# Start-/Shutdown-Fehler und versucht danach erneut.
restore_ok=0
for attempt in $(seq 1 12); do
if docker exec -i restoretest-immich-postgres \
pg_restore -U immich -d immich --clean --if-exists --no-owner --no-privileges \
< "$RESTORE_ROOT/dumps/latest/immich.dump" 2>/tmp/immich-pg-restore.err; then
restore_ok=1
break
fi
if grep -qiE "starting up|shutting down|connection refused|database .* does not exist" /tmp/immich-pg-restore.err; then
sleep 5
continue
fi
cat /tmp/immich-pg-restore.err >&2
exit 1
done
if [ "$restore_ok" -ne 1 ]; then
cat /tmp/immich-pg-restore.err >&2
exit 1
fi
# Immich prueft seit v2 Systemordner-Marker unter UPLOAD_LOCATION.
# Da der Smoke-Test bewusst keine produktiven Foto-Pfade mountet, erzeugen
# wir eine leere Test-Struktur mit den erwarteten Markern.
for dir in thumbs upload backups library profile encoded-video; do
mkdir -p "$RESTORE_ROOT/upload/$dir"
touch "$RESTORE_ROOT/upload/$dir/.immich"
done
chmod -R a+rwX "$RESTORE_ROOT/upload"
# Stufe 3: Immich-Server starten (ohne ML)
docker compose -f "$COMPOSE_FILE" up -d restoretest-immich-server >/dev/null
# Immich-Server braucht beim ersten Start einige Sekunden fuer DB-Migrations-Checks.
# Wir geben ihm bis zu 120s und pollen den HTTP-Endpunkt.
http_status=""
for _ in $(seq 1 60); do
http_status="$(curl -s -o /tmp/immich-body.html -w '%{http_code}' -L http://127.0.0.1:12283 || true)"
if [ "$http_status" = "200" ] || [ "$http_status" = "302" ] || [ "$http_status" = "303" ]; then
break
fi
sleep 2
done
# Body-Check: Immich-UI hat typische Marker. Wir matchen tolerant.
body_check="ok"
if ! grep -qiE "immich|login|signin" /tmp/immich-body.html 2>/dev/null; then
body_check="missing-marker"
fi
if [ "$http_status" != "200" ] && [ "$http_status" != "302" ] && [ "$http_status" != "303" ]; then
echo "Immich HTTP smoke failed: status=$http_status" >&2
docker ps -a --filter name=restoretest-immich >&2 || true
docker logs --tail 120 restoretest-immich-server >&2 || true
exit 1
fi
if [ "$body_check" != "ok" ]; then
echo "Immich HTTP smoke failed: body marker=$body_check" >&2
docker logs --tail 120 restoretest-immich-server >&2 || true
exit 1
fi
# Asset-Count aus DB. Immich v2 nutzt Singular-Tabellen (`asset`,
# `"user"`); ältere Schema-Staende werden tolerant als Fallback versucht.
query_count() {
local sql="$1"
docker exec restoretest-immich-postgres \
psql -U immich -d immich -tAc "$sql" 2>/dev/null \
| tr -d '[:space:]' || true
}
asset_count="$(query_count 'select count(*) from asset;')"
if [ -z "$asset_count" ]; then
asset_count="$(query_count 'select count(*) from assets;')"
fi
if [ -z "$asset_count" ]; then
asset_count="n/a"
fi
# User-Count als zusaetzlicher DB-Sanity-Check
user_count="$(query_count 'select count(*) from "user";')"
if [ -z "$user_count" ]; then
user_count="$(query_count 'select count(*) from users;')"
fi
if [ -z "$user_count" ]; then
user_count="n/a"
fi
write_report "$REPORT_FILE" <<EOF
# Immich Restore Test Report - $(date +%F)
- Service: \`immich\`
- Source repo: \`$repo\`
- Archive: \`$archive\`
- Restore root: \`$RESTORE_ROOT\`
- Test containers:
- \`restoretest-immich-server\`
- \`restoretest-immich-postgres\` (ghcr.io/immich-app/postgres:14-vectorchord0.4.3-pgvectors0.2.0)
- \`restoretest-immich-redis\`
- Test endpoint: \`http://127.0.0.1:12283\`
- ML container: deliberately omitted
- Result: \`SUCCESS\`
## Checks
- Borg extract of \`immich.dump\`: \`ok\`
- Dump import into isolated Postgres: \`ok\`
- HTTP status after redirect: \`$http_status\`
- Login page marker: \`$body_check\`
- Asset count in test DB: \`$asset_count\`
- User count in test DB: \`$user_count\`
## Notes
- Test ran without Traefik and without the productive domain.
- Productive photo paths under /mnt/user/photos/* were NOT mounted.
- Test data was cleaned after success: \`$([ "$KEEP_DATA" -eq 1 ] && echo no || echo yes)\`
- Restore-Quelle Dump: \`local/borg-dumps/latest/immich.dump\` aus aktuellem Borg-Archiv.
EOF
echo "Immich restore test ok -> $REPORT_FILE"