From dfa3acc21e68e2136a5b925d2732a242fbe05ac4 Mon Sep 17 00:00:00 2001 From: Micha Date: Sat, 13 Jun 2026 08:37:33 +0200 Subject: [PATCH] ops: add home assistant restore test --- ops/restore-tests/README.md | 3 +- .../homeassistant-compose.test.yml | 29 +++ .../homeassistant-restore-test.sh | 236 ++++++++++++++++++ ops/restore-tests/run-restore-checks.sh | 8 +- 4 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 ops/restore-tests/homeassistant-compose.test.yml create mode 100755 ops/restore-tests/homeassistant-restore-test.sh diff --git a/ops/restore-tests/README.md b/ops/restore-tests/README.md index 034299a..19c0b66 100644 --- a/ops/restore-tests/README.md +++ b/ops/restore-tests/README.md @@ -59,7 +59,7 @@ Stand 2026-06-11 ist der Betrieb auf V1+ (validierte Bash-Host-Jobs mit ntfy): # Frische-Check bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh freshness -# Dienst-Restore-Check (vaultwarden|gitea|paperless|immich|authelia|adguard|redis|komodo-bootstrap|nextcloud) +# Dienst-Restore-Check (vaultwarden|gitea|paperless|immich|authelia|adguard|redis|homeassistant|komodo-bootstrap|nextcloud) bash /mnt/user/services/homelab-infra/ops/restore-tests/run-restore-checks.sh # Negativtest des Alarmwegs (quartalsweise) @@ -77,6 +77,7 @@ Einziger Status-Ort ist die **Reifegrad-Tabelle** in `docs/RESTORE_MATRIX.md` - **Nextcloud:** Test am 2026-06-03 erfolgreich, aber mit Unraid-shfs-Eigenheit: Nextcloud fuehrt `chmod()` unter `/var/www/html` aus, was auf FUSE/shfs scheitert. Das Skript patcht `check_data_directory_permissions: false` und legt den `.ncdata`-Marker an. - **Authelia:** bewusst Config-Smoke ohne produktiven Dump-Restore (Storage-Encryption-Key-Kopplung). - **Immich:** Foto-Dateien-Restore ist bewusst nicht Teil des Smokes (separater DR-Drill); Test-Postgres nutzt das produktive VectorChord-Image. +- **Home Assistant:** nutzt das neueste HA-native Backup-Artefakt und eine Kopie der Mosquitto-Appdata; Testcontainer laufen nur auf localhost-Ports, ohne Traefik/Public Route. - **Unraid-Flash / Tailscale:** noch ohne vollstaendigen Erstlauf - `unraid-flash-runbook.md`, `tailscale-runbook.md`; offene Schritte in `docs/MASTER_TODO.md`. ## Naechste Ausbaustufen diff --git a/ops/restore-tests/homeassistant-compose.test.yml b/ops/restore-tests/homeassistant-compose.test.yml new file mode 100644 index 0000000..731bc97 --- /dev/null +++ b/ops/restore-tests/homeassistant-compose.test.yml @@ -0,0 +1,29 @@ +services: + restoretest-ha-mosquitto: + image: eclipse-mosquitto:2.0.22@sha256:914f529386804c8278a4e581526b9be5e1604df44b30daabc70aa97dcefe5268 + container_name: restoretest-ha-mosquitto + restart: "no" + volumes: + - ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/config/mosquitto.conf:/mosquitto/config/mosquitto.conf:ro + - ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/config:/mosquitto/external_config:ro + - ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/data:/mosquitto/data + - ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/mosquitto/appdata/log:/mosquitto/log + ports: + - "127.0.0.1:11883:1883" + security_opt: + - no-new-privileges:true + + restoretest-homeassistant: + image: ghcr.io/home-assistant/home-assistant:2026.6.1@sha256:59aa8824955c9db491b75d2eebe42bd68494f80c2ec69ec0d66d9dae37d37514 + container_name: restoretest-homeassistant + restart: "no" + depends_on: + - restoretest-ha-mosquitto + environment: + TZ: Europe/Berlin + volumes: + - ${RESTORE_ROOT:-/mnt/user/backups/restore-lab/homeassistant}/homeassistant/config:/config + ports: + - "127.0.0.1:18123:8123" + security_opt: + - no-new-privileges:true diff --git a/ops/restore-tests/homeassistant-restore-test.sh b/ops/restore-tests/homeassistant-restore-test.sh new file mode 100755 index 0000000..f22f60a --- /dev/null +++ b/ops/restore-tests/homeassistant-restore-test.sh @@ -0,0 +1,236 @@ +#!/bin/bash +set -euo pipefail + +# Home Assistant + Mosquitto Restore Smoke Test +# +# Scope: +# - Restore aus dem neuesten HA-nativen Backup-Artefakt +# - Kopie der Mosquitto-Appdata in ein isoliertes Restore-Lab +# - Kopie des Fachrepo-Clones zur Lesbarkeits-/Git-Status-Pruefung +# - Start isolierter Testcontainer auf localhost-Ports, ohne Traefik/Public Route +# - HA HTTP/API-Smoke und MQTT Publish/Subscribe + retained Topic nach Restart + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +. "$SCRIPT_DIR/common.sh" + +WHATIF=0 +KEEP_DATA=0 +for arg in "$@"; do + case "$arg" in + --what-if) WHATIF=1 ;; + --keep-data) KEEP_DATA=1 ;; + *) echo "Unknown argument: $arg" >&2; exit 1 ;; + esac +done + +RESTORE_ROOT="/mnt/user/backups/restore-lab/homeassistant" +REPORT_ROOT="/mnt/user/backups/restore-reports" +REPORT_FILE="$REPORT_ROOT/homeassistant-$(date +%F).md" +COMPOSE_FILE="$SCRIPT_DIR/homeassistant-compose.test.yml" +HA_BACKUP_DIR="/mnt/user/appdata/homeassistant/backups" +MOSQUITTO_APPDATA="/mnt/user/appdata/mosquitto" +MOSQUITTO_REPO_CONF="/mnt/user/services/homelab-infra/smart-home/mosquitto/config/mosquitto.conf" +FACHREPO_SOURCE="/mnt/user/services/smart-home-kalli" +HA_TOKEN_FILE="/mnt/user/appdata/secrets/ha_token_codex" + +if [ "$WHATIF" -eq 1 ]; then + cat <&2 + exit 1 +fi + +rm -rf "$RESTORE_ROOT" +mkdir -p \ + "$RESTORE_ROOT/ha-backup" \ + "$RESTORE_ROOT/homeassistant/config" \ + "$RESTORE_ROOT/mosquitto/config" \ + "$RESTORE_ROOT/mosquitto/appdata/config" \ + "$RESTORE_ROOT/mosquitto/appdata/data" \ + "$RESTORE_ROOT/mosquitto/appdata/log" \ + "$RESTORE_ROOT/fachrepo" + +tar -xf "$latest_backup" -C "$RESTORE_ROOT/ha-backup" +require_path "$RESTORE_ROOT/ha-backup/backup.json" +require_path "$RESTORE_ROOT/ha-backup/homeassistant.tar.gz" +tar -xzf "$RESTORE_ROOT/ha-backup/homeassistant.tar.gz" -C "$RESTORE_ROOT/homeassistant/config" --strip-components=1 data + +cp "$MOSQUITTO_REPO_CONF" "$RESTORE_ROOT/mosquitto/config/mosquitto.conf" +cp -a "$MOSQUITTO_APPDATA/config/." "$RESTORE_ROOT/mosquitto/appdata/config/" +cp -a "$MOSQUITTO_APPDATA/data/." "$RESTORE_ROOT/mosquitto/appdata/data/" +if [ -d "$MOSQUITTO_APPDATA/log" ]; then + cp -a "$MOSQUITTO_APPDATA/log/." "$RESTORE_ROOT/mosquitto/appdata/log/" || true +fi +cp -a "$FACHREPO_SOURCE/." "$RESTORE_ROOT/fachrepo/" + +ha_config="$RESTORE_ROOT/homeassistant/config" +require_path "$ha_config/configuration.yaml" +require_path "$ha_config/secrets.yaml" +require_path "$ha_config/trusted_proxies.yaml" +require_path "$ha_config/.storage/onboarding" +require_path "$ha_config/.storage/auth" + +fachrepo_head="$(git -C "$RESTORE_ROOT/fachrepo" log -1 --oneline)" +fachrepo_status="$(git -C "$RESTORE_ROOT/fachrepo" status --short)" +if [ -n "$fachrepo_status" ]; then + echo "Restored fachrepo clone is not clean:" >&2 + echo "$fachrepo_status" >&2 + exit 1 +fi + +backup_size="$(stat -c '%s' "$latest_backup")" +ha_file_count="$(find "$ha_config" -type f | wc -l | tr -d ' ')" +ha_bytes="$(du -sb "$ha_config" | awk '{print $1}')" +mosquitto_data_bytes="$(du -sb "$RESTORE_ROOT/mosquitto/appdata" | awk '{print $1}')" + +RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" down >/dev/null 2>&1 || true +RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" up -d >/dev/null + +mqtt_user="$(sed -n 's/^mqtt_username:[[:space:]]*//p' "$ha_config/secrets.yaml" | sed "s/^['\"]//;s/['\"]$//")" +mqtt_pass="$(sed -n 's/^mqtt_password:[[:space:]]*//p' "$ha_config/secrets.yaml" | sed "s/^['\"]//;s/['\"]$//")" +if [ -z "$mqtt_user" ] || [ -z "$mqtt_pass" ]; then + echo "Missing mqtt_username or mqtt_password in restored HA secrets.yaml" >&2 + exit 1 +fi + +mqtt_topic="restoretest/homeassistant/smoke" +mqtt_payload="ok-$(date +%s)" +mqtt_out="$RESTORE_ROOT/mqtt-sub.out" +rm -f "$mqtt_out" +docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$mqtt_topic" \ + restoretest-ha-mosquitto sh -lc \ + 'mosquitto_sub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -C 1 -W 10' \ + > "$mqtt_out" & +sub_pid=$! +sleep 1 +docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$mqtt_topic" -e MQTT_PAYLOAD="$mqtt_payload" \ + restoretest-ha-mosquitto sh -lc \ + 'mosquitto_pub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -m "$MQTT_PAYLOAD"' +wait "$sub_pid" +mqtt_result="$(cat "$mqtt_out")" +if [ "$mqtt_result" != "$mqtt_payload" ]; then + echo "MQTT publish/subscribe smoke failed" >&2 + exit 1 +fi + +retained_topic="restoretest/homeassistant/retained" +retained_payload="retained-$(date +%s)" +docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$retained_topic" -e MQTT_PAYLOAD="$retained_payload" \ + restoretest-ha-mosquitto sh -lc \ + 'mosquitto_pub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -m "$MQTT_PAYLOAD" -r' +docker restart restoretest-ha-mosquitto >/dev/null +sleep 3 +retained_result="$(docker exec -e MQTT_USER="$mqtt_user" -e MQTT_PASS="$mqtt_pass" -e MQTT_TOPIC="$retained_topic" \ + restoretest-ha-mosquitto sh -lc \ + 'mosquitto_sub -h 127.0.0.1 -p 1883 -u "$MQTT_USER" -P "$MQTT_PASS" -t "$MQTT_TOPIC" -C 1 -W 10' | tr -d '\r')" +if [ "$retained_result" != "$retained_payload" ]; then + echo "MQTT retained smoke failed" >&2 + exit 1 +fi + +ha_http_status="" +ha_body="$RESTORE_ROOT/ha-http-body.html" +for _ in $(seq 1 180); do + ha_http_status="$(curl -sS -o "$ha_body" -w '%{http_code}' http://127.0.0.1:18123/ || true)" + if [ "$ha_http_status" = "200" ] && grep -qi "Home Assistant" "$ha_body"; then + break + fi + sleep 1 +done +if [ "$ha_http_status" != "200" ] || ! grep -qi "Home Assistant" "$ha_body"; then + echo "HA HTTP smoke failed, status=$ha_http_status" >&2 + docker logs --tail 120 restoretest-homeassistant >&2 || true + exit 1 +fi + +ha_api_status="$(curl -sS -o "$RESTORE_ROOT/ha-api.json" -w '%{http_code}' \ + -H "Authorization: Bearer $(cat "$HA_TOKEN_FILE")" \ + -H 'Content-Type: application/json' \ + http://127.0.0.1:18123/api/ || true)" +if [ "$ha_api_status" != "200" ]; then + echo "HA API token smoke failed, status=$ha_api_status" >&2 + exit 1 +fi + +RESTORE_ROOT="$RESTORE_ROOT" docker compose -f "$COMPOSE_FILE" exec -T restoretest-homeassistant \ + python -m homeassistant --script check_config --config /config >/tmp/restoretest-ha-check-config.out + +write_report "$REPORT_FILE" < $REPORT_FILE" diff --git a/ops/restore-tests/run-restore-checks.sh b/ops/restore-tests/run-restore-checks.sh index 129419c..8434076 100755 --- a/ops/restore-tests/run-restore-checks.sh +++ b/ops/restore-tests/run-restore-checks.sh @@ -55,6 +55,12 @@ case "$MODE" in fi exec "$SCRIPT_DIR/redis-restore-test.sh" ;; + homeassistant) + if [ "$WHATIF" = "--what-if" ]; then + exec "$SCRIPT_DIR/homeassistant-restore-test.sh" --what-if + fi + exec "$SCRIPT_DIR/homeassistant-restore-test.sh" + ;; nextcloud) if [ "$WHATIF" = "--what-if" ]; then exec "$SCRIPT_DIR/nextcloud-restore-test.sh" --what-if @@ -98,7 +104,7 @@ case "$MODE" in exec "$SCRIPT_DIR/shared-pg-cluster-restore-test.sh" ;; *) - echo "Usage: $0 {freshness|freshness-negative|vaultwarden|gitea|paperless|immich|authelia|adguard|redis|nextcloud|komodo-bootstrap|komodo-mongo-restore|traefik|mailarchiver|mealie|shared-pg-cluster} [--what-if]" >&2 + echo "Usage: $0 {freshness|freshness-negative|vaultwarden|gitea|paperless|immich|authelia|adguard|redis|homeassistant|nextcloud|komodo-bootstrap|komodo-mongo-restore|traefik|mailarchiver|mealie|shared-pg-cluster} [--what-if]" >&2 exit 1 ;; esac