ops: add home assistant restore test

This commit is contained in:
2026-06-13 08:37:33 +02:00
parent 2eb8da1cd4
commit dfa3acc21e
4 changed files with 274 additions and 2 deletions
+2 -1
View File
@@ -59,7 +59,7 @@ Stand 2026-06-11 ist der Betrieb auf V1+ (validierte Bash-Host-Jobs mit ntfy):
# Frische-Check # Frische-Check
bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh freshness bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh freshness
# Dienst-Restore-Check (vaultwarden|gitea|paperless|immich|authelia|adguard|redis|komodo-bootstrap|nextcloud) # Dienst-Restore-Check (vaultwarden|gitea|paperless|immich|authelia|adguard|redis|homeassistant|komodo-bootstrap|nextcloud)
bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh <dienst> bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh <dienst>
# Negativtest des Alarmwegs (quartalsweise) # Negativtest des Alarmwegs (quartalsweise)
@@ -77,6 +77,7 @@ Einziger Status-Ort ist die **Reifegrad-Tabelle** in `docs/RESTORE_MATRIX.md`
- **Nextcloud:** Test am 2026-06-03 erfolgreich, aber mit Unraid-shfs-Eigenheit: Nextcloud fuehrt `chmod()` unter `/var/www/html` aus, was auf FUSE/shfs scheitert. Das Skript patcht `check_data_directory_permissions: false` und legt den `.ncdata`-Marker an. - **Nextcloud:** Test am 2026-06-03 erfolgreich, aber mit Unraid-shfs-Eigenheit: Nextcloud fuehrt `chmod()` unter `/var/www/html` aus, was auf FUSE/shfs scheitert. Das Skript patcht `check_data_directory_permissions: false` und legt den `.ncdata`-Marker an.
- **Authelia:** bewusst Config-Smoke ohne produktiven Dump-Restore (Storage-Encryption-Key-Kopplung). - **Authelia:** bewusst Config-Smoke ohne produktiven Dump-Restore (Storage-Encryption-Key-Kopplung).
- **Immich:** Foto-Dateien-Restore ist bewusst nicht Teil des Smokes (separater DR-Drill); Test-Postgres nutzt das produktive VectorChord-Image. - **Immich:** Foto-Dateien-Restore ist bewusst nicht Teil des Smokes (separater DR-Drill); Test-Postgres nutzt das produktive VectorChord-Image.
- **Home Assistant:** nutzt das neueste HA-native Backup-Artefakt und eine Kopie der Mosquitto-Appdata; Testcontainer laufen nur auf localhost-Ports, ohne Traefik/Public Route.
- **Unraid-Flash / Tailscale:** noch ohne vollstaendigen Erstlauf - `unraid-flash-runbook.md`, `tailscale-runbook.md`; offene Schritte in `docs/MASTER_TODO.md`. - **Unraid-Flash / Tailscale:** noch ohne vollstaendigen Erstlauf - `unraid-flash-runbook.md`, `tailscale-runbook.md`; offene Schritte in `docs/MASTER_TODO.md`.
## Naechste Ausbaustufen ## Naechste Ausbaustufen
@@ -0,0 +1,29 @@
services:
restoretest-ha-mosquitto:
image: eclipse-mosquitto:2.0.22@sha256:914f529386804c8278a4e581526b9be5e1604df44b30daabc70aa97dcefe5268
container_name: restoretest-ha-mosquitto
restart: "no"
volumes:
- ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/config/mosquitto.conf:/mosquitto/config/mosquitto.conf:ro
- ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/config:/mosquitto/external_config:ro
- ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/data:/mosquitto/data
- ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/log:/mosquitto/log
ports:
- "127.0.0.1:11883:1883"
security_opt:
- no-new-privileges:true
restoretest-homeassistant:
image: ghcr.io/home-assistant/home-assistant:2026.6.1@sha256:59aa8824955c9db491b75d2eebe42bd68494f80c2ec69ec0d66d9dae37d37514
container_name: restoretest-homeassistant
restart: "no"
depends_on:
- restoretest-ha-mosquitto
environment:
TZ: Europe/Berlin
volumes:
- ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/homeassistant/config:/config
ports:
- "127.0.0.1:18123:8123"
security_opt:
- no-new-privileges:true
+236
View File
@@ -0,0 +1,236 @@
#!/bin/bash
set -euo pipefail
# Home Assistant + Mosquitto Restore Smoke Test
#
# Scope:
# - Restore aus dem neuesten HA-nativen Backup-Artefakt
# - Kopie der Mosquitto-Appdata in ein isoliertes Restore-Lab
# - Kopie des Fachrepo-Clones zur Lesbarkeits-/Git-Status-Pruefung
# - Start isolierter Testcontainer auf localhost-Ports, ohne Traefik/Public Route
# - HA HTTP/API-Smoke und MQTT Publish/Subscribe + retained Topic nach Restart
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/common.sh"
WHATIF=0
KEEP_DATA=0
for arg in "$@"; do
case "$arg" in
--what-if) WHATIF=1 ;;
--keep-data) KEEP_DATA=1 ;;
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
esac
done
RESTORE_ROOT="/mnt/user/backups/restore-lab/homeassistant"
REPORT_ROOT="/mnt/user/backups/restore-reports"
REPORT_FILE="$REPORT_ROOT/homeassistant-$(date +%F).md"
COMPOSE_FILE="$SCRIPT_DIR/homeassistant-compose.test.yml"
HA_BACKUP_DIR="/mnt/user/appdata/homeassistant/backups"
MOSQUITTO_APPDATA="/mnt/user/appdata/mosquitto"
MOSQUITTO_REPO_CONF="/mnt/user/services/homelab-infra/smart-home/mosquitto/config/mosquitto.conf"
FACHREPO_SOURCE="/mnt/user/services/smart-home-kalli"
HA_TOKEN_FILE="/mnt/user/appdata/secrets/ha_token_codex"
if [ "$WHATIF" -eq 1 ]; then
cat <<EOF
Home Assistant restore test
Mode: WhatIf
RestoreRoot: $RESTORE_ROOT
HA backup source: newest *.tar under $HA_BACKUP_DIR
Mosquitto source: $MOSQUITTO_APPDATA
Fachrepo source: $FACHREPO_SOURCE
Test endpoints: HA http://127.0.0.1:18123, MQTT 127.0.0.1:11883
Scope: HA backup extract + isolated HA boot + API token smoke + MQTT auth/retained smoke
EOF
exit 0
fi
require_cmd docker
require_cmd tar
require_cmd curl
require_path "$COMPOSE_FILE"
require_path "$HA_BACKUP_DIR"
require_path "$MOSQUITTO_APPDATA/config/passwordfile"
require_path "$MOSQUITTO_APPDATA/config/aclfile"
require_path "$MOSQUITTO_APPDATA/data"
require_path "$MOSQUITTO_REPO_CONF"
require_path "$FACHREPO_SOURCE"
require_path "$HA_TOKEN_FILE"
RESTORE_SUCCESS=0
cleanup() {
RESTORE_ROOT="$RESTORE_ROOT" cleanup_compose "$COMPOSE_FILE"
if [ "$RESTORE_SUCCESS" -ne 1 ]; then
preserve_on_failure "homeassistant" "$RESTORE_ROOT"
return
fi
if [ "$KEEP_DATA" -ne 1 ]; then
rm -rf "$RESTORE_ROOT"
fi
}
trap cleanup EXIT
latest_backup="$(find "$HA_BACKUP_DIR" -maxdepth 1 -type f -name '*.tar' -printf '%T@ %p\n' | sort -nr | awk 'NR==1 {print substr($0, index($0,$2))}')"
if [ -z "$latest_backup" ] || [ ! -f "$latest_backup" ]; then
echo "No HA native backup tar found under $HA_BACKUP_DIR" >&2
exit 1
fi
rm -rf "$RESTORE_ROOT"
mkdir -p \
"$RESTORE_ROOT/ha-backup" \
"$RESTORE_ROOT/homeassistant/config" \
"$RESTORE_ROOT/mosquitto/config" \
"$RESTORE_ROOT/mosquitto/appdata/config" \
"$RESTORE_ROOT/mosquitto/appdata/data" \
"$RESTORE_ROOT/mosquitto/appdata/log" \
"$RESTORE_ROOT/fachrepo"
tar -xf "$latest_backup" -C "$RESTORE_ROOT/ha-backup"
require_path "$RESTORE_ROOT/ha-backup/backup.json"
require_path "$RESTORE_ROOT/ha-backup/homeassistant.tar.gz"
tar -xzf "$RESTORE_ROOT/ha-backup/homeassistant.tar.gz" -C "$RESTORE_ROOT/homeassistant/config" --strip-components=1 data
cp "$MOSQUITTO_REPO_CONF" "$RESTORE_ROOT/mosquitto/config/mosquitto.conf"
cp -a "$MOSQUITTO_APPDATA/config/." "$RESTORE_ROOT/mosquitto/appdata/config/"
cp -a "$MOSQUITTO_APPDATA/data/." "$RESTORE_ROOT/mosquitto/appdata/data/"
if [ -d "$MOSQUITTO_APPDATA/log" ]; then
cp -a "$MOSQUITTO_APPDATA/log/." "$RESTORE_ROOT/mosquitto/appdata/log/" || true
fi
cp -a "$FACHREPO_SOURCE/." "$RESTORE_ROOT/fachrepo/"
ha_config="$RESTORE_ROOT/homeassistant/config"
require_path "$ha_config/configuration.yaml"
require_path "$ha_config/secrets.yaml"
require_path "$ha_config/trusted_proxies.yaml"
require_path "$ha_config/.storage/onboarding"
require_path "$ha_config/.storage/auth"
fachrepo_head="$(git -C "$RESTORE_ROOT/fachrepo" log -1 --oneline)"
fachrepo_status="$(git -C "$RESTORE_ROOT/fachrepo" status --short)"
if [ -n "$fachrepo_status" ]; then
echo "Restored fachrepo clone is not clean:" >&2
echo "$fachrepo_status" >&2
exit 1
fi
backup_size="$(stat -c '%s' "$latest_backup")"
ha_file_count="$(find "$ha_config" -type f | wc -l | tr -d ' ')"
ha_bytes="$(du -sb "$ha_config" | awk '{print $1}')"
mosquitto_data_bytes="$(du -sb "$RESTORE_ROOT/mosquitto/appdata" | awk '{print $1}')"
RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" down >/dev/null 2>&1 || true
RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" up -d >/dev/null
mqtt_user="$(sed -n 's/^mqtt_username:[[:space:]]*//p' "$ha_config/secrets.yaml" | sed "s/^['\"]//;s/['\"]$//")"
mqtt_pass="$(sed -n 's/^mqtt_password:[[:space:]]*//p' "$ha_config/secrets.yaml" | sed "s/^['\"]//;s/['\"]$//")"
if [ -z "$mqtt_user" ] || [ -z "$mqtt_pass" ]; then
echo "Missing mqtt_username or mqtt_password in restored HA secrets.yaml" >&2
exit 1
fi
mqtt_topic="restoretest/homeassistant/smoke"
mqtt_payload="ok-$(date +%s)"
mqtt_out="$RESTORE_ROOT/mqtt-sub.out"
rm -f "$mqtt_out"
docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$mqtt_topic" \
restoretest-ha-mosquitto sh -lc \
'mosquitto_sub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -C 1 -W 10' \
> "$mqtt_out" &
sub_pid=$!
sleep 1
docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$mqtt_topic" -e MQTT_PAYLOAD="$mqtt_payload" \
restoretest-ha-mosquitto sh -lc \
'mosquitto_pub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -m "$MQTT_PAYLOAD"'
wait "$sub_pid"
mqtt_result="$(cat "$mqtt_out")"
if [ "$mqtt_result" != "$mqtt_payload" ]; then
echo "MQTT publish/subscribe smoke failed" >&2
exit 1
fi
retained_topic="restoretest/homeassistant/retained"
retained_payload="retained-$(date +%s)"
docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$retained_topic" -e MQTT_PAYLOAD="$retained_payload" \
restoretest-ha-mosquitto sh -lc \
'mosquitto_pub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -m "$MQTT_PAYLOAD" -r'
docker restart restoretest-ha-mosquitto >/dev/null
sleep 3
retained_result="$(docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$retained_topic" \
restoretest-ha-mosquitto sh -lc \
'mosquitto_sub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -C 1 -W 10' | tr -d '\r')"
if [ "$retained_result" != "$retained_payload" ]; then
echo "MQTT retained smoke failed" >&2
exit 1
fi
ha_http_status=""
ha_body="$RESTORE_ROOT/ha-http-body.html"
for _ in $(seq 1 180); do
ha_http_status="$(curl -sS -o "$ha_body" -w '%{http_code}' http://127.0.0.1:18123/ || true)"
if [ "$ha_http_status" = "200" ] && grep -qi "Home Assistant" "$ha_body"; then
break
fi
sleep 1
done
if [ "$ha_http_status" != "200" ] || ! grep -qi "Home Assistant" "$ha_body"; then
echo "HA HTTP smoke failed, status=$ha_http_status" >&2
docker logs --tail 120 restoretest-homeassistant >&2 || true
exit 1
fi
ha_api_status="$(curl -sS -o "$RESTORE_ROOT/ha-api.json" -w '%{http_code}' \
-H "Authorization: Bearer $(cat "$HA_TOKEN_FILE")" \
-H 'Content-Type: application/json' \
http://127.0.0.1:18123/api/ || true)"
if [ "$ha_api_status" != "200" ]; then
echo "HA API token smoke failed, status=$ha_api_status" >&2
exit 1
fi
RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" exec -T restoretest-homeassistant \
python -m homeassistant --script check_config --config /config >/tmp/restoretest-ha-check-config.out
write_report "$REPORT_FILE" <<EOF
# Home Assistant Restore Test Report - $(date +%F)
- Service: \`homeassistant\` + \`smarthome-mosquitto\`
- HA backup source: \`$latest_backup\`
- Restore root: \`$RESTORE_ROOT\`
- Test containers:
- \`restoretest-homeassistant\`
- \`restoretest-ha-mosquitto\`
- Test endpoints:
- HA: \`http://127.0.0.1:18123\`
- MQTT: \`127.0.0.1:11883\`
- Result: \`SUCCESS\`
## Checks
- HA-native backup tar readable: \`ok\`
- HA inner archive restored: \`ok\`
- HA backup size bytes: \`$backup_size\`
- Restored HA file count: \`$ha_file_count\`
- Restored HA bytes: \`$ha_bytes\`
- Restored Mosquitto appdata bytes: \`$mosquitto_data_bytes\`
- Fachrepo clone clean: \`ok\`
- Fachrepo HEAD: \`$fachrepo_head\`
- HA HTTP status: \`$ha_http_status\`
- HA API token smoke: \`$ha_api_status\`
- HA check_config: \`ok\`
- MQTT publish/subscribe with restored credentials: \`ok\`
- MQTT retained topic after broker restart: \`ok\`
## Notes
- Productive \`homeassistant\` and \`smarthome-mosquitto\` containers were not used.
- Test ran without Traefik and without the productive domain.
- Test ports were bound to localhost only.
- Token and MQTT password values were used for smoke tests but not printed.
- Test data was cleaned after success: \`$([ "$KEEP_DATA" -eq 1 ] && echo no || echo yes)\`
EOF
RESTORE_SUCCESS=1
echo "Home Assistant restore test ok -> $REPORT_FILE"
+7 -1
View File
@@ -55,6 +55,12 @@ case "$MODE" in
fi fi
exec "$SCRIPT_DIR/redis-restore-test.sh" exec "$SCRIPT_DIR/redis-restore-test.sh"
;; ;;
homeassistant)
if [ "$WHATIF" = "--what-if" ]; then
exec "$SCRIPT_DIR/homeassistant-restore-test.sh" --what-if
fi
exec "$SCRIPT_DIR/homeassistant-restore-test.sh"
;;
nextcloud) nextcloud)
if [ "$WHATIF" = "--what-if" ]; then if [ "$WHATIF" = "--what-if" ]; then
exec "$SCRIPT_DIR/nextcloud-restore-test.sh" --what-if exec "$SCRIPT_DIR/nextcloud-restore-test.sh" --what-if
@@ -98,7 +104,7 @@ case "$MODE" in
exec "$SCRIPT_DIR/shared-pg-cluster-restore-test.sh" exec "$SCRIPT_DIR/shared-pg-cluster-restore-test.sh"
;; ;;
*) *)
echo "Usage: $0 {freshness|freshness-negative|vaultwarden|gitea|paperless|immich|authelia|adguard|redis|nextcloud|komodo-bootstrap|komodo-mongo-restore|traefik|mailarchiver|mealie|shared-pg-cluster} [--what-if]" >&2 echo "Usage: $0 {freshness|freshness-negative|vaultwarden|gitea|paperless|immich|authelia|adguard|redis|homeassistant|nextcloud|komodo-bootstrap|komodo-mongo-restore|traefik|mailarchiver|mealie|shared-pg-cluster} [--what-if]" >&2
exit 1 exit 1
;; ;;
esac esac