ops: add komodo stack hygiene posture-check

Catches the failure class that let immich_new slip through: stacks
without a configured repo, project_missing, hash drift, and repo
compose files without a matching Komodo stack. Dry-run on host found
6 honest warnings, 0 critical. Wrapper as Unraid User Script for
weekly cadence is tracked in MASTER_TODO.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-12 12:51:07 +02:00
parent 3eedbcbe16
commit b387757e87
2 changed files with 199 additions and 0 deletions
@@ -0,0 +1,198 @@
#!/usr/bin/env bash
set -euo pipefail
# Komodo-Stack-Hygiene-Check.
#
# Prueft, dass jeder Komodo-Stack sauber gegen das Git-Repo konfiguriert ist,
# und dass jeder Compose-File im Repo einen passenden Komodo-Stack hat.
# Findet die Klasse von Fehlern, die `immich_new` (2026-06-12) durchgelassen
# hat: Stack RUNNING, aber kein Repo / kein Account / project_missing.
REPO_ROOT="${REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/komodo-stack-hygiene-last.json}"
NTFY_SCRIPT="${NTFY_SCRIPT:-$REPO_ROOT/ops/restore-tests/send-ntfy.sh}"
NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}"
SEND_NTFY="${SEND_NTFY:-1}"
KOMODO_ENV_FILE="${KOMODO_ENV_FILE:-/mnt/user/appdata/secrets/codex_komodo_api.env}"
KOMODO_CONTAINER="${KOMODO_CONTAINER:-komodo-core}"
# Komma-separierte Allowlist fuer bewusst inline-managed Stacks.
# Quelle: memory/komodo-stack-inline-managed.md, CLAUDE.md.
INLINE_ALLOWLIST="${INLINE_ALLOWLIST:-komodo,grafana}"
# Compose-Files unter diesen Pfaden zaehlen NICHT als erwartete Stacks
# (Beispiele, Archive, Submodule).
COMPOSE_EXCLUDE_PATTERN="${COMPOSE_EXCLUDE_PATTERN:-/archive/|/examples/|/.git/}"
TMP_DIR="${TMP_DIR:-/tmp/kallilab-komodo-stack-hygiene}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
STACKS_FILE="$TMP_DIR/stacks.$$.json"
: > "$RESULTS_FILE"
trap 'rm -f "$RESULTS_FILE" "$STACKS_FILE"' EXIT
json_escape() {
sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g'
}
add_result() {
printf '%s\t%s\t%s\n' "$1" "$2" "$3" >> "$RESULTS_FILE"
}
is_inline_allowed() {
local name="$1"
local IFS=,
for entry in $INLINE_ALLOWLIST; do
[ "$name" = "$entry" ] && return 0
done
return 1
}
# Komodo-API-Credentials laden und Stack-Liste holen.
if [ ! -r "$KOMODO_ENV_FILE" ]; then
add_result "warning" "komodo-api" "Komodo env file not readable: $KOMODO_ENV_FILE"
else
set -a
# shellcheck disable=SC1090
. "$KOMODO_ENV_FILE"
set +a
if ! docker exec \
-e KOMODO_CLI_HOST \
-e KOMODO_CLI_KEY \
-e KOMODO_CLI_SECRET \
"$KOMODO_CONTAINER" km list -a stacks -f json > "$STACKS_FILE" 2>/dev/null; then
add_result "warning" "komodo-api" "km list stacks failed (container=$KOMODO_CONTAINER)"
: > "$STACKS_FILE"
fi
fi
# Per-Stack-Checks. Trenner: "|" statt Tab, weil IFS=Tab leere Felder kollabiert
# (Tab ist Whitespace in IFS). "|" kommt in Stack-Namen/Repos/Hashes nicht vor.
if [ -s "$STACKS_FILE" ]; then
while IFS='|' read -r name repo project_missing missing_files state deployed_hash latest_hash files_on_host file_contents; do
[ -n "$name" ] || continue
if is_inline_allowed "$name"; then
add_result "ok" "$name" "Inline-managed (allowlisted), skipping repo checks"
continue
fi
# Failure-Mode 1: Stack hat keine Git-Quelle (immich_new-Symptom).
if [ "$repo" = "-" ] && [ "$files_on_host" != "True" ] && [ "$file_contents" != "True" ]; then
add_result "critical" "$name" "Stack has no repo configured and is not inline-allowed"
continue
fi
# Failure-Mode 2: Komodo meldet Project Missing.
if [ "$project_missing" = "True" ]; then
add_result "critical" "$name" "project_missing=true (missing_files=$missing_files)"
continue
fi
# Failure-Mode 3: Stack-Name passt zu keinem Compose-File im Repo.
# Case-insensitive (Compose-Dir kann GroSs/klein abweichen, z.B. Adguard).
match_found=""
while IFS= read -r dir; do
[ -n "$dir" ] || continue
if [ -f "$dir/docker-compose.yml" ] \
|| [ -f "$dir/docker-compose.yaml" ] \
|| [ -f "$dir/compose.yml" ] \
|| [ -f "$dir/compose.yaml" ]; then
match_found=1
break
fi
done < <(find "$REPO_ROOT" -type d -iname "$name" -not -path "*/.git/*" 2>/dev/null)
if [ -z "$match_found" ]; then
# Verwaiste Stacks wie das frueher gesehene `immich_new`: Komodo kennt
# ihn, aber im Repo gibt's keinen Compose-Pfad.
add_result "warning" "$name" "Stack name does not match any compose directory in repo"
fi
# Failure-Mode 4: Deployed-Hash hinkt latest hinterher.
# "-" = unbekannt (z.B. gitea self-host edge case), nicht als Drift werten.
if [ "$deployed_hash" != "-" ] && [ "$latest_hash" != "-" ] && [ "$deployed_hash" != "$latest_hash" ]; then
add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash"
fi
# Failure-Mode 5: Stack ist down.
if [ "$state" = "down" ] || [ "$state" = "unknown" ]; then
add_result "warning" "$name" "Stack state is $state"
fi
add_result "ok" "$name" "Stack hygiene OK (state=$state, hash=$deployed_hash)"
done < <(jq -r '.[] | [
.name // "-",
(.info.repo // "-"),
(.info.project_missing | if . then "True" else "False" end),
(((.info.missing_files // []) | join(",")) | if . == "" then "-" else . end),
(.info.state // "-"),
(.info.deployed_hash // "-"),
(.info.latest_hash // "-"),
(.info.files_on_host | if . then "True" else "False" end),
(.info.file_contents | if . then "True" else "False" end)
] | join("|")' "$STACKS_FILE")
fi
# Failure-Mode 6: Compose-File im Repo, aber kein Komodo-Stack mit gleichem Namen.
if [ -s "$STACKS_FILE" ]; then
known_names="$(jq -r '.[].name' "$STACKS_FILE")"
while IFS= read -r -d '' compose; do
rel="${compose#$REPO_ROOT/}"
if printf '%s' "$rel" | grep -Eq "$COMPOSE_EXCLUDE_PATTERN"; then
continue
fi
dir_name="$(basename "$(dirname "$compose")")"
if is_inline_allowed "$dir_name"; then
continue
fi
# Case-insensitive, weil z.B. host-services/Adguard <-> Komodo-Stack adguard
# legitim als gematched gilt.
if ! printf '%s\n' "$known_names" | grep -Fixq "$dir_name"; then
add_result "warning" "$dir_name" "Compose file $rel has no matching Komodo stack"
fi
done < <(find "$REPO_ROOT" -path "$REPO_ROOT/.git" -prune -o -type f \
\( -name docker-compose.yml -o -name docker-compose.yaml \
-o -name compose.yml -o -name compose.yaml \) -print0)
fi
timestamp="$(date -Iseconds)"
critical_count="$(awk -F '\t' '$1 == "critical" { c++ } END { print c + 0 }' "$RESULTS_FILE")"
warning_count="$(awk -F '\t' '$1 == "warning" { c++ } END { print c + 0 }' "$RESULTS_FILE")"
status="ok"
[ "$warning_count" -gt 0 ] && status="warning"
[ "$critical_count" -gt 0 ] && status="critical"
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
printf '{\n'
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
printf ' "status": "%s",\n' "$status"
printf ' "critical_count": %s,\n' "$critical_count"
printf ' "warning_count": %s,\n' "$warning_count"
printf ' "checks": [\n'
first=1
while IFS=$'\t' read -r severity name message; do
if [ "$first" -eq 0 ]; then printf ',\n'; fi
first=0
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
"$(printf '%s' "$severity" | json_escape)" \
"$(printf '%s' "$name" | json_escape)" \
"$(printf '%s' "$message" | json_escape)"
done < "$RESULTS_FILE"
printf '\n ]\n}\n'
} > "$OUTPUT_PATH.tmp"
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
cat "$OUTPUT_PATH"
if [ "$critical_count" -gt 0 ] || [ "$warning_count" -gt 0 ]; then
if [ "$SEND_NTFY" = "1" ] && [ -x "$NTFY_SCRIPT" ]; then
priority="default"
[ "$warning_count" -gt 0 ] && priority="high"
[ "$critical_count" -gt 0 ] && priority="urgent"
"$NTFY_SCRIPT" "$NTFY_TOPIC" \
"Komodo stack hygiene: $critical_count critical, $warning_count warning" \
"See $OUTPUT_PATH" "$priority" || true
fi
[ "$critical_count" -gt 0 ] && exit 2
exit 1
fi