From b387757e870b1fcd15a743b2ddde5b7f4e05d837 Mon Sep 17 00:00:00 2001 From: Micha Date: Fri, 12 Jun 2026 12:51:07 +0200 Subject: [PATCH] ops: add komodo stack hygiene posture-check Catches the failure class that let immich_new slip through: stacks without a configured repo, project_missing, hash drift, and repo compose files without a matching Komodo stack. Dry-run on host found 6 honest warnings, 0 critical. Wrapper as Unraid User Script for weekly cadence is tracked in MASTER_TODO. Co-Authored-By: Claude Opus 4.7 --- docs/MASTER_TODO.md | 1 + .../posture-check/komodo-stack-hygiene.sh | 198 ++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 services/posture-check/komodo-stack-hygiene.sh diff --git a/docs/MASTER_TODO.md b/docs/MASTER_TODO.md index 247d373..e3bc888 100644 --- a/docs/MASTER_TODO.md +++ b/docs/MASTER_TODO.md @@ -25,6 +25,7 @@ Host-Reports (`/mnt/user/backups/restore-reports/`) und in der Git-Historie. | Restore-Test Tailscale | Operator | State-Validierung + Reconnect nur auf Wegwerf-Host/VM, danach Geraet in Tailscale-Admin entfernen | `ops/restore-tests/tailscale-runbook.md` | | Authelia OIDC fuer Apps | Operator/Claude | Live: Grafana + Mealie (verifiziert), Paperless deployed (Login-Test offen). Immich + Nextcloud bewusst geparkt bis Family-Onboarding (siehe `docs/DECISIONS.md` 2026-06-06) | `docs/AUTHELIA_OIDC_PLAN.md` | | Audit-PDF aus `docs/` entfernen | Operator | `docs/KalliLab_CORE_Audit_2026-06-06.pdf` (untracked) extern ablegen (H:/ oder Documents-Share) und lokal loeschen; Binaerdateien gehoeren nicht ins GitOps-Repo | Doku-Regeln `docs/REPO_MAP.md` | +| Komodo-Stack-Hygiene-Check aktivieren | Operator | Skript `services/posture-check/komodo-stack-hygiene.sh` ist gemerged und auf dem Host getestet. Verbleibt: Unraid User Script `komodo-stack-hygiene-weekly` anlegen, der `bash /mnt/user/services/homelab-infra/services/posture-check/komodo-stack-hygiene.sh` ausfuehrt. Findet u.a. die `immich_new`-Klasse (Stack ohne Repo, `project_missing`, Compose-File ohne Stack). | `services/posture-check/komodo-stack-hygiene.sh` | --- diff --git a/services/posture-check/komodo-stack-hygiene.sh b/services/posture-check/komodo-stack-hygiene.sh new file mode 100644 index 0000000..d45384b --- /dev/null +++ b/services/posture-check/komodo-stack-hygiene.sh @@ -0,0 +1,198 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Komodo-Stack-Hygiene-Check. +# +# Prueft, dass jeder Komodo-Stack sauber gegen das Git-Repo konfiguriert ist, +# und dass jeder Compose-File im Repo einen passenden Komodo-Stack hat. +# Findet die Klasse von Fehlern, die `immich_new` (2026-06-12) durchgelassen +# hat: Stack RUNNING, aber kein Repo / kein Account / project_missing. + +REPO_ROOT="${REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}" +OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/komodo-stack-hygiene-last.json}" +NTFY_SCRIPT="${NTFY_SCRIPT:-$REPO_ROOT/ops/restore-tests/send-ntfy.sh}" +NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}" +SEND_NTFY="${SEND_NTFY:-1}" +KOMODO_ENV_FILE="${KOMODO_ENV_FILE:-/mnt/user/appdata/secrets/codex_komodo_api.env}" +KOMODO_CONTAINER="${KOMODO_CONTAINER:-komodo-core}" + +# Komma-separierte Allowlist fuer bewusst inline-managed Stacks. +# Quelle: memory/komodo-stack-inline-managed.md, CLAUDE.md. +INLINE_ALLOWLIST="${INLINE_ALLOWLIST:-komodo,grafana}" + +# Compose-Files unter diesen Pfaden zaehlen NICHT als erwartete Stacks +# (Beispiele, Archive, Submodule). +COMPOSE_EXCLUDE_PATTERN="${COMPOSE_EXCLUDE_PATTERN:-/archive/|/examples/|/.git/}" + +TMP_DIR="${TMP_DIR:-/tmp/kallilab-komodo-stack-hygiene}" +mkdir -p "$TMP_DIR" +RESULTS_FILE="$TMP_DIR/results.$$" +STACKS_FILE="$TMP_DIR/stacks.$$.json" +: > "$RESULTS_FILE" +trap 'rm -f "$RESULTS_FILE" "$STACKS_FILE"' EXIT + +json_escape() { + sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g' +} + +add_result() { + printf '%s\t%s\t%s\n' "$1" "$2" "$3" >> "$RESULTS_FILE" +} + +is_inline_allowed() { + local name="$1" + local IFS=, + for entry in $INLINE_ALLOWLIST; do + [ "$name" = "$entry" ] && return 0 + done + return 1 +} + +# Komodo-API-Credentials laden und Stack-Liste holen. +if [ ! -r "$KOMODO_ENV_FILE" ]; then + add_result "warning" "komodo-api" "Komodo env file not readable: $KOMODO_ENV_FILE" +else + set -a + # shellcheck disable=SC1090 + . "$KOMODO_ENV_FILE" + set +a + if ! docker exec \ + -e KOMODO_CLI_HOST \ + -e KOMODO_CLI_KEY \ + -e KOMODO_CLI_SECRET \ + "$KOMODO_CONTAINER" km list -a stacks -f json > "$STACKS_FILE" 2>/dev/null; then + add_result "warning" "komodo-api" "km list stacks failed (container=$KOMODO_CONTAINER)" + : > "$STACKS_FILE" + fi +fi + +# Per-Stack-Checks. Trenner: "|" statt Tab, weil IFS=Tab leere Felder kollabiert +# (Tab ist Whitespace in IFS). "|" kommt in Stack-Namen/Repos/Hashes nicht vor. +if [ -s "$STACKS_FILE" ]; then + while IFS='|' read -r name repo project_missing missing_files state deployed_hash latest_hash files_on_host file_contents; do + [ -n "$name" ] || continue + + if is_inline_allowed "$name"; then + add_result "ok" "$name" "Inline-managed (allowlisted), skipping repo checks" + continue + fi + + # Failure-Mode 1: Stack hat keine Git-Quelle (immich_new-Symptom). + if [ "$repo" = "-" ] && [ "$files_on_host" != "True" ] && [ "$file_contents" != "True" ]; then + add_result "critical" "$name" "Stack has no repo configured and is not inline-allowed" + continue + fi + + # Failure-Mode 2: Komodo meldet Project Missing. + if [ "$project_missing" = "True" ]; then + add_result "critical" "$name" "project_missing=true (missing_files=$missing_files)" + continue + fi + + # Failure-Mode 3: Stack-Name passt zu keinem Compose-File im Repo. + # Case-insensitive (Compose-Dir kann GroSs/klein abweichen, z.B. Adguard). + match_found="" + while IFS= read -r dir; do + [ -n "$dir" ] || continue + if [ -f "$dir/docker-compose.yml" ] \ + || [ -f "$dir/docker-compose.yaml" ] \ + || [ -f "$dir/compose.yml" ] \ + || [ -f "$dir/compose.yaml" ]; then + match_found=1 + break + fi + done < <(find "$REPO_ROOT" -type d -iname "$name" -not -path "*/.git/*" 2>/dev/null) + if [ -z "$match_found" ]; then + # Verwaiste Stacks wie das frueher gesehene `immich_new`: Komodo kennt + # ihn, aber im Repo gibt's keinen Compose-Pfad. + add_result "warning" "$name" "Stack name does not match any compose directory in repo" + fi + + # Failure-Mode 4: Deployed-Hash hinkt latest hinterher. + # "-" = unbekannt (z.B. gitea self-host edge case), nicht als Drift werten. + if [ "$deployed_hash" != "-" ] && [ "$latest_hash" != "-" ] && [ "$deployed_hash" != "$latest_hash" ]; then + add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash" + fi + + # Failure-Mode 5: Stack ist down. + if [ "$state" = "down" ] || [ "$state" = "unknown" ]; then + add_result "warning" "$name" "Stack state is $state" + fi + + add_result "ok" "$name" "Stack hygiene OK (state=$state, hash=$deployed_hash)" + done < <(jq -r '.[] | [ + .name // "-", + (.info.repo // "-"), + (.info.project_missing | if . then "True" else "False" end), + (((.info.missing_files // []) | join(",")) | if . == "" then "-" else . end), + (.info.state // "-"), + (.info.deployed_hash // "-"), + (.info.latest_hash // "-"), + (.info.files_on_host | if . then "True" else "False" end), + (.info.file_contents | if . then "True" else "False" end) + ] | join("|")' "$STACKS_FILE") +fi + +# Failure-Mode 6: Compose-File im Repo, aber kein Komodo-Stack mit gleichem Namen. +if [ -s "$STACKS_FILE" ]; then + known_names="$(jq -r '.[].name' "$STACKS_FILE")" + while IFS= read -r -d '' compose; do + rel="${compose#$REPO_ROOT/}" + if printf '%s' "$rel" | grep -Eq "$COMPOSE_EXCLUDE_PATTERN"; then + continue + fi + dir_name="$(basename "$(dirname "$compose")")" + if is_inline_allowed "$dir_name"; then + continue + fi + # Case-insensitive, weil z.B. host-services/Adguard <-> Komodo-Stack adguard + # legitim als gematched gilt. + if ! printf '%s\n' "$known_names" | grep -Fixq "$dir_name"; then + add_result "warning" "$dir_name" "Compose file $rel has no matching Komodo stack" + fi + done < <(find "$REPO_ROOT" -path "$REPO_ROOT/.git" -prune -o -type f \ + \( -name docker-compose.yml -o -name docker-compose.yaml \ + -o -name compose.yml -o -name compose.yaml \) -print0) +fi + +timestamp="$(date -Iseconds)" +critical_count="$(awk -F '\t' '$1 == "critical" { c++ } END { print c + 0 }' "$RESULTS_FILE")" +warning_count="$(awk -F '\t' '$1 == "warning" { c++ } END { print c + 0 }' "$RESULTS_FILE")" +status="ok" +[ "$warning_count" -gt 0 ] && status="warning" +[ "$critical_count" -gt 0 ] && status="critical" + +mkdir -p "$(dirname "$OUTPUT_PATH")" +{ + printf '{\n' + printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)" + printf ' "status": "%s",\n' "$status" + printf ' "critical_count": %s,\n' "$critical_count" + printf ' "warning_count": %s,\n' "$warning_count" + printf ' "checks": [\n' + first=1 + while IFS=$'\t' read -r severity name message; do + if [ "$first" -eq 0 ]; then printf ',\n'; fi + first=0 + printf ' {"severity":"%s","name":"%s","message":"%s"}' \ + "$(printf '%s' "$severity" | json_escape)" \ + "$(printf '%s' "$name" | json_escape)" \ + "$(printf '%s' "$message" | json_escape)" + done < "$RESULTS_FILE" + printf '\n ]\n}\n' +} > "$OUTPUT_PATH.tmp" +mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH" +cat "$OUTPUT_PATH" + +if [ "$critical_count" -gt 0 ] || [ "$warning_count" -gt 0 ]; then + if [ "$SEND_NTFY" = "1" ] && [ -x "$NTFY_SCRIPT" ]; then + priority="default" + [ "$warning_count" -gt 0 ] && priority="high" + [ "$critical_count" -gt 0 ] && priority="urgent" + "$NTFY_SCRIPT" "$NTFY_TOPIC" \ + "Komodo stack hygiene: $critical_count critical, $warning_count warning" \ + "See $OUTPUT_PATH" "$priority" || true + fi + [ "$critical_count" -gt 0 ] && exit 2 + exit 1 +fi