d933d3cee8
- Hash drift now requires actual file changes inside the stack's compose-dir between deployed_hash and latest_hash. Komodo's deployed_hash bumps only on redeploy while latest_hash tracks master HEAD, which produced six false-positive "Pending Update" warnings for stacks whose own files never changed. - Add EXPECTED_NOT_IN_KOMODO env (default: hermes-agent) for compose files intentionally not Komodo-managed (work-in-progress, build/dev compose). End-to-end run on host: 0 critical, 0 warnings. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
238 lines
9.2 KiB
Bash
238 lines
9.2 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Komodo-Stack-Hygiene-Check.
|
|
#
|
|
# Prueft, dass jeder Komodo-Stack sauber gegen das Git-Repo konfiguriert ist,
|
|
# und dass jeder Compose-File im Repo einen passenden Komodo-Stack hat.
|
|
# Findet die Klasse von Fehlern, die `immich_new` (2026-06-12) durchgelassen
|
|
# hat: Stack RUNNING, aber kein Repo / kein Account / project_missing.
|
|
|
|
REPO_ROOT="${REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
|
|
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/komodo-stack-hygiene-last.json}"
|
|
NTFY_SCRIPT="${NTFY_SCRIPT:-$REPO_ROOT/ops/restore-tests/send-ntfy.sh}"
|
|
NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}"
|
|
SEND_NTFY="${SEND_NTFY:-1}"
|
|
KOMODO_ENV_FILE="${KOMODO_ENV_FILE:-/mnt/user/appdata/secrets/codex_komodo_api.env}"
|
|
KOMODO_CONTAINER="${KOMODO_CONTAINER:-komodo-core}"
|
|
|
|
# Komma-separierte Allowlist fuer bewusst inline-managed Stacks.
|
|
# Quelle: memory/komodo-stack-inline-managed.md, CLAUDE.md.
|
|
INLINE_ALLOWLIST="${INLINE_ALLOWLIST:-komodo,grafana}"
|
|
|
|
# Compose-Files unter diesen Pfaden zaehlen NICHT als erwartete Stacks
|
|
# (Beispiele, Archive, Submodule).
|
|
COMPOSE_EXCLUDE_PATTERN="${COMPOSE_EXCLUDE_PATTERN:-/archive/|/examples/|/.git/}"
|
|
|
|
# Compose-Dir-Namen, die bewusst NICHT als Komodo-Stack laufen sollen
|
|
# (Work-in-progress, Build-/Dev-Compose, manuell deployed). Komma-separiert.
|
|
EXPECTED_NOT_IN_KOMODO="${EXPECTED_NOT_IN_KOMODO:-hermes-agent}"
|
|
|
|
TMP_DIR="${TMP_DIR:-/tmp/kallilab-komodo-stack-hygiene}"
|
|
mkdir -p "$TMP_DIR"
|
|
RESULTS_FILE="$TMP_DIR/results.$$"
|
|
STACKS_FILE="$TMP_DIR/stacks.$$.json"
|
|
: > "$RESULTS_FILE"
|
|
trap 'rm -f "$RESULTS_FILE" "$STACKS_FILE"' EXIT
|
|
|
|
json_escape() {
|
|
sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g'
|
|
}
|
|
|
|
add_result() {
|
|
printf '%s\t%s\t%s\n' "$1" "$2" "$3" >> "$RESULTS_FILE"
|
|
}
|
|
|
|
is_inline_allowed() {
|
|
local name="$1"
|
|
local IFS=,
|
|
for entry in $INLINE_ALLOWLIST; do
|
|
[ "$name" = "$entry" ] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
is_expected_not_in_komodo() {
|
|
local name="$1"
|
|
local IFS=,
|
|
for entry in $EXPECTED_NOT_IN_KOMODO; do
|
|
[ "$name" = "$entry" ] && return 0
|
|
done
|
|
return 1
|
|
}
|
|
|
|
# True drift: do files inside this stack's compose-dir actually differ
|
|
# between deployed_hash and latest_hash? Komodo's deployed_hash bumps only
|
|
# on redeploy, while latest_hash tracks master HEAD - that produces a noisy
|
|
# "Pending Update" even when the stack itself wasn't touched.
|
|
stack_files_changed() {
|
|
local name="$1" deployed="$2" latest="$3"
|
|
local dir
|
|
# Locate the stack's compose dir (case-insensitive, same as Mode 3).
|
|
dir="$(find "$REPO_ROOT" -type d -iname "$name" -not -path "*/.git/*" 2>/dev/null | head -1)"
|
|
[ -n "$dir" ] || return 0 # No dir -> can't tell, treat as drift to be safe
|
|
( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$deployed" >/dev/null ) || return 0
|
|
( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$latest" >/dev/null ) || return 0
|
|
local rel="${dir#$REPO_ROOT/}"
|
|
if ( cd "$REPO_ROOT" && git diff --quiet "$deployed".."$latest" -- "$rel" ); then
|
|
return 1 # no change
|
|
fi
|
|
return 0 # real change
|
|
}
|
|
|
|
# Komodo-API-Credentials laden und Stack-Liste holen.
|
|
if [ ! -r "$KOMODO_ENV_FILE" ]; then
|
|
add_result "warning" "komodo-api" "Komodo env file not readable: $KOMODO_ENV_FILE"
|
|
else
|
|
set -a
|
|
# shellcheck disable=SC1090
|
|
. "$KOMODO_ENV_FILE"
|
|
set +a
|
|
if ! docker exec \
|
|
-e KOMODO_CLI_HOST \
|
|
-e KOMODO_CLI_KEY \
|
|
-e KOMODO_CLI_SECRET \
|
|
"$KOMODO_CONTAINER" km list -a stacks -f json > "$STACKS_FILE" 2>/dev/null; then
|
|
add_result "warning" "komodo-api" "km list stacks failed (container=$KOMODO_CONTAINER)"
|
|
: > "$STACKS_FILE"
|
|
fi
|
|
fi
|
|
|
|
# Per-Stack-Checks. Trenner: "|" statt Tab, weil IFS=Tab leere Felder kollabiert
|
|
# (Tab ist Whitespace in IFS). "|" kommt in Stack-Namen/Repos/Hashes nicht vor.
|
|
if [ -s "$STACKS_FILE" ]; then
|
|
while IFS='|' read -r name repo project_missing missing_files state deployed_hash latest_hash files_on_host file_contents; do
|
|
[ -n "$name" ] || continue
|
|
|
|
if is_inline_allowed "$name"; then
|
|
add_result "ok" "$name" "Inline-managed (allowlisted), skipping repo checks"
|
|
continue
|
|
fi
|
|
|
|
# Failure-Mode 1: Stack hat keine Git-Quelle (immich_new-Symptom).
|
|
if [ "$repo" = "-" ] && [ "$files_on_host" != "True" ] && [ "$file_contents" != "True" ]; then
|
|
add_result "critical" "$name" "Stack has no repo configured and is not inline-allowed"
|
|
continue
|
|
fi
|
|
|
|
# Failure-Mode 2: Komodo meldet Project Missing.
|
|
if [ "$project_missing" = "True" ]; then
|
|
add_result "critical" "$name" "project_missing=true (missing_files=$missing_files)"
|
|
continue
|
|
fi
|
|
|
|
# Failure-Mode 3: Stack-Name passt zu keinem Compose-File im Repo.
|
|
# Case-insensitive (Compose-Dir kann GroSs/klein abweichen, z.B. Adguard).
|
|
match_found=""
|
|
while IFS= read -r dir; do
|
|
[ -n "$dir" ] || continue
|
|
if [ -f "$dir/docker-compose.yml" ] \
|
|
|| [ -f "$dir/docker-compose.yaml" ] \
|
|
|| [ -f "$dir/compose.yml" ] \
|
|
|| [ -f "$dir/compose.yaml" ]; then
|
|
match_found=1
|
|
break
|
|
fi
|
|
done < <(find "$REPO_ROOT" -type d -iname "$name" -not -path "*/.git/*" 2>/dev/null)
|
|
if [ -z "$match_found" ]; then
|
|
# Verwaiste Stacks wie das frueher gesehene `immich_new`: Komodo kennt
|
|
# ihn, aber im Repo gibt's keinen Compose-Pfad.
|
|
add_result "warning" "$name" "Stack name does not match any compose directory in repo"
|
|
fi
|
|
|
|
# Failure-Mode 4: Deployed-Hash hinkt latest hinterher UND der Stack-Dir
|
|
# hat tatsaechlich File-Aenderungen dazwischen. Reine Komodo-Hash-Bewegung
|
|
# ohne Stack-Inhalt aendert nichts und ist kein echter Drift.
|
|
# "-" = unbekannt (z.B. gitea self-host edge case), nicht als Drift werten.
|
|
if [ "$deployed_hash" != "-" ] && [ "$latest_hash" != "-" ] \
|
|
&& [ "$deployed_hash" != "$latest_hash" ] \
|
|
&& stack_files_changed "$name" "$deployed_hash" "$latest_hash"; then
|
|
add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash (stack files changed)"
|
|
fi
|
|
|
|
# Failure-Mode 5: Stack ist down.
|
|
if [ "$state" = "down" ] || [ "$state" = "unknown" ]; then
|
|
add_result "warning" "$name" "Stack state is $state"
|
|
fi
|
|
|
|
add_result "ok" "$name" "Stack hygiene OK (state=$state, hash=$deployed_hash)"
|
|
done < <(jq -r '.[] | [
|
|
.name // "-",
|
|
(.info.repo // "-"),
|
|
(.info.project_missing | if . then "True" else "False" end),
|
|
(((.info.missing_files // []) | join(",")) | if . == "" then "-" else . end),
|
|
(.info.state // "-"),
|
|
(.info.deployed_hash // "-"),
|
|
(.info.latest_hash // "-"),
|
|
(.info.files_on_host | if . then "True" else "False" end),
|
|
(.info.file_contents | if . then "True" else "False" end)
|
|
] | join("|")' "$STACKS_FILE")
|
|
fi
|
|
|
|
# Failure-Mode 6: Compose-File im Repo, aber kein Komodo-Stack mit gleichem Namen.
|
|
if [ -s "$STACKS_FILE" ]; then
|
|
known_names="$(jq -r '.[].name' "$STACKS_FILE")"
|
|
while IFS= read -r -d '' compose; do
|
|
rel="${compose#$REPO_ROOT/}"
|
|
if printf '%s' "$rel" | grep -Eq "$COMPOSE_EXCLUDE_PATTERN"; then
|
|
continue
|
|
fi
|
|
dir_name="$(basename "$(dirname "$compose")")"
|
|
if is_inline_allowed "$dir_name"; then
|
|
continue
|
|
fi
|
|
if is_expected_not_in_komodo "$dir_name"; then
|
|
continue
|
|
fi
|
|
# Case-insensitive, weil z.B. host-services/Adguard <-> Komodo-Stack adguard
|
|
# legitim als gematched gilt.
|
|
if ! printf '%s\n' "$known_names" | grep -Fixq "$dir_name"; then
|
|
add_result "warning" "$dir_name" "Compose file $rel has no matching Komodo stack"
|
|
fi
|
|
done < <(find "$REPO_ROOT" -path "$REPO_ROOT/.git" -prune -o -type f \
|
|
\( -name docker-compose.yml -o -name docker-compose.yaml \
|
|
-o -name compose.yml -o -name compose.yaml \) -print0)
|
|
fi
|
|
|
|
timestamp="$(date -Iseconds)"
|
|
critical_count="$(awk -F '\t' '$1 == "critical" { c++ } END { print c + 0 }' "$RESULTS_FILE")"
|
|
warning_count="$(awk -F '\t' '$1 == "warning" { c++ } END { print c + 0 }' "$RESULTS_FILE")"
|
|
status="ok"
|
|
[ "$warning_count" -gt 0 ] && status="warning"
|
|
[ "$critical_count" -gt 0 ] && status="critical"
|
|
|
|
mkdir -p "$(dirname "$OUTPUT_PATH")"
|
|
{
|
|
printf '{\n'
|
|
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
|
|
printf ' "status": "%s",\n' "$status"
|
|
printf ' "critical_count": %s,\n' "$critical_count"
|
|
printf ' "warning_count": %s,\n' "$warning_count"
|
|
printf ' "checks": [\n'
|
|
first=1
|
|
while IFS=$'\t' read -r severity name message; do
|
|
if [ "$first" -eq 0 ]; then printf ',\n'; fi
|
|
first=0
|
|
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
|
|
"$(printf '%s' "$severity" | json_escape)" \
|
|
"$(printf '%s' "$name" | json_escape)" \
|
|
"$(printf '%s' "$message" | json_escape)"
|
|
done < "$RESULTS_FILE"
|
|
printf '\n ]\n}\n'
|
|
} > "$OUTPUT_PATH.tmp"
|
|
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
|
|
cat "$OUTPUT_PATH"
|
|
|
|
if [ "$critical_count" -gt 0 ] || [ "$warning_count" -gt 0 ]; then
|
|
if [ "$SEND_NTFY" = "1" ] && [ -x "$NTFY_SCRIPT" ]; then
|
|
priority="default"
|
|
[ "$warning_count" -gt 0 ] && priority="high"
|
|
[ "$critical_count" -gt 0 ] && priority="urgent"
|
|
"$NTFY_SCRIPT" "$NTFY_TOPIC" \
|
|
"Komodo stack hygiene: $critical_count critical, $warning_count warning" \
|
|
"See $OUTPUT_PATH" "$priority" || true
|
|
fi
|
|
[ "$critical_count" -gt 0 ] && exit 2
|
|
exit 1
|
|
fi
|