Harden posture/borg audit scripts (robustness + coverage)

Working-tree improvements to the audit scripts (authored locally, not by me;
reviewed for correctness + bash -n clean before commit):

- compose-runtime-drift: prefer `docker compose config` for the expected image
  with a raw-parse fallback; raw parser now resolves YAML anchors (*alias) so
  anchor-based composes (e.g. dawarich) no longer mis-report drift.
- komodo-stack-hygiene: treat an unreachable Komodo API as critical and exit 3
  so the Healthchecks EXIT trap sends /fail (the monitor itself is down, not
  "all green"); git fetch before hash-drift compare; clearer "cannot compare"
  message; pin in-container km host to localhost:9120.
- cert-token-check: expand monitored cert domains to the full set incl.
  hc.kaleschke.info.
- gitea-bundle-mirror: skip empty repos without refs instead of failing.
- unraid-user-scripts.md: document SEND_NTFY/NTFY_TOPIC for the daily report.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-24 11:35:55 +02:00
parent 036eba99a8
commit ad9bb40b95
5 changed files with 145 additions and 14 deletions
+41 -11
View File
@@ -15,6 +15,8 @@ NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}"
SEND_NTFY="${SEND_NTFY:-1}"
KOMODO_ENV_FILE="${KOMODO_ENV_FILE:-/mnt/user/appdata/secrets/codex_komodo_api.env}"
KOMODO_CONTAINER="${KOMODO_CONTAINER:-komodo-core}"
KOMODO_CLI_HOST_FOR_CONTAINER="${KOMODO_CLI_HOST_FOR_CONTAINER:-http://localhost:9120}"
FETCH_BEFORE_DIFF="${FETCH_BEFORE_DIFF:-1}"
# Komma-separierte Allowlist fuer bewusst inline-managed Stacks.
# Quelle: memory/komodo-stack-inline-managed.md, CLAUDE.md.
@@ -32,12 +34,15 @@ TMP_DIR="${TMP_DIR:-/tmp/kallilab-komodo-stack-hygiene}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
STACKS_FILE="$TMP_DIR/stacks.$$.json"
API_ERROR_FILE="$TMP_DIR/komodo-api.$$.err"
API_UNREACHABLE=0
: > "$RESULTS_FILE"
: > "$API_ERROR_FILE"
# Healthchecks Heartbeat (endpoint-agnostisch; Capability-URL ist ein Secret, nie ins Repo)
HC_URL_FILE="${HC_URL_FILE:-/mnt/user/appdata/secrets/healthchecks_komodo_hygiene_url}"
hc_url=""; [ -r "$HC_URL_FILE" ] && hc_url="$(tr -d '[:space:]' < "$HC_URL_FILE")"
hc_ping() { [ -n "$hc_url" ] || return 0; curl -fsS -m 10 --retry 3 "${hc_url}${1:-}" >/dev/null 2>&1 || true; }
trap 'hc_rc=$?; rm -f "$RESULTS_FILE" "$STACKS_FILE"; [ "$hc_rc" -le 2 ] && hc_ping "" || hc_ping "/fail"' EXIT
trap 'hc_rc=$?; rm -f "$RESULTS_FILE" "$STACKS_FILE" "$API_ERROR_FILE"; [ "$hc_rc" -le 2 ] && hc_ping "" || hc_ping "/fail"' EXIT
hc_ping "/start"
json_escape() {
@@ -73,11 +78,21 @@ is_expected_not_in_komodo() {
stack_files_changed() {
local name="$1" deployed="$2" latest="$3"
local dir
HASH_COMPARE_REASON=""
# Locate the stack's compose dir (case-insensitive, same as Mode 3).
dir="$(find "$REPO_ROOT" -type d -iname "$name" -not -path "*/.git/*" 2>/dev/null | head -1)"
[ -n "$dir" ] || return 0 # No dir -> can't tell, treat as drift to be safe
( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$deployed" >/dev/null ) || return 0
( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$latest" >/dev/null ) || return 0
if [ -z "$dir" ]; then
HASH_COMPARE_REASON="no compose directory found for stack"
return 1
fi
if ! ( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$deployed" >/dev/null ); then
HASH_COMPARE_REASON="deployed_hash $deployed is not available in local repo"
return 1
fi
if ! ( cd "$REPO_ROOT" && git rev-parse --verify --quiet "$latest" >/dev/null ); then
HASH_COMPARE_REASON="latest_hash $latest is not available in local repo"
return 1
fi
local rel="${dir#$REPO_ROOT/}"
if ( cd "$REPO_ROOT" && git diff --quiet "$deployed".."$latest" -- "$rel" ); then
return 1 # no change
@@ -85,20 +100,29 @@ stack_files_changed() {
return 0 # real change
}
if [ "$FETCH_BEFORE_DIFF" = "1" ]; then
if ! ( cd "$REPO_ROOT" && git fetch --quiet origin >/dev/null 2>&1 ); then
add_result "warning" "repo-fetch" "Could not fetch origin before hash drift comparisons"
fi
fi
# Komodo-API-Credentials laden und Stack-Liste holen.
if [ ! -r "$KOMODO_ENV_FILE" ]; then
add_result "warning" "komodo-api" "Komodo env file not readable: $KOMODO_ENV_FILE"
API_UNREACHABLE=1
add_result "critical" "komodo-api" "Komodo env file not readable: $KOMODO_ENV_FILE"
else
set -a
# shellcheck disable=SC1090
. "$KOMODO_ENV_FILE"
set +a
if ! docker exec \
-e KOMODO_CLI_HOST \
-e "KOMODO_CLI_HOST=$KOMODO_CLI_HOST_FOR_CONTAINER" \
-e KOMODO_CLI_KEY \
-e KOMODO_CLI_SECRET \
"$KOMODO_CONTAINER" km list -a stacks -f json > "$STACKS_FILE" 2>/dev/null; then
add_result "warning" "komodo-api" "km list stacks failed (container=$KOMODO_CONTAINER)"
"$KOMODO_CONTAINER" km list -a stacks -f json > "$STACKS_FILE" 2>"$API_ERROR_FILE"; then
API_UNREACHABLE=1
api_error="$(tr '\n' ' ' < "$API_ERROR_FILE" | sed -E 's/[[:space:]]+/ /g' | cut -c 1-180)"
add_result "critical" "komodo-api" "km list stacks failed (container=$KOMODO_CONTAINER): ${api_error:-unknown error}"
: > "$STACKS_FILE"
fi
fi
@@ -150,9 +174,12 @@ if [ -s "$STACKS_FILE" ]; then
# ohne Stack-Inhalt aendert nichts und ist kein echter Drift.
# "-" = unbekannt (z.B. gitea self-host edge case), nicht als Drift werten.
if [ "$deployed_hash" != "-" ] && [ "$latest_hash" != "-" ] \
&& [ "$deployed_hash" != "$latest_hash" ] \
&& stack_files_changed "$name" "$deployed_hash" "$latest_hash"; then
add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash (stack files changed)"
&& [ "$deployed_hash" != "$latest_hash" ]; then
if stack_files_changed "$name" "$deployed_hash" "$latest_hash"; then
add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash (stack files changed)"
elif [ -n "${HASH_COMPARE_REASON:-}" ]; then
add_result "warning" "$name" "deployed_hash $deployed_hash != latest_hash $latest_hash (cannot compare: $HASH_COMPARE_REASON)"
fi
fi
# Failure-Mode 5: Stack ist down.
@@ -237,6 +264,9 @@ if [ "$critical_count" -gt 0 ] || [ "$warning_count" -gt 0 ]; then
"Komodo stack hygiene: $critical_count critical, $warning_count warning" \
"See $OUTPUT_PATH" "$priority" || true
fi
# If Komodo could not be queried at all, the hygiene monitor itself is broken.
# Use rc=3 so the Healthchecks EXIT trap sends /fail instead of a green ping.
[ "$API_UNREACHABLE" -eq 1 ] && exit 3
[ "$critical_count" -gt 0 ] && exit 2
exit 1
fi