report: unhealthy-Container namentlich + Image-Age-Allowlist
Zwei Verbesserungen am Daily Operations Report, ausgeloest durch den versteckten immich_machine_learning-Ausfall (lief 2,3 Tage unhealthy, weil der Report nur "unhealthy=1" zaehlte, ohne Name/Grund): 1. collect_container_state: neue Sektion "Unhealthy Container" listet jeden unhealthy Container mit FailingStreak und letztem Healthcheck-Output. So ist sofort sichtbar WELCHER Container und WARUM. 2. collect_image_freshness: neue Image-Age-Allowlist (image-age-allow.patterns). Bewusst gepinnte, aber aktuelle/empfohlene Images (immich_postgres = exakt Immichs Pin; blackbox-exporter v0.28.0 = latest) werden mit Recheck-Datum von der Ueberalterungs-Warnung ausgenommen. Nach Ablauf des Recheck-Datums greift die Ausnahme nicht mehr -> erzwingt Neubewertung statt stillen Alterns. Top-10-Tabelle hat jetzt eine Hinweis-Spalte (ueberaltert / bewusst gepinnt). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ SINCE="${SINCE:-24h}"
|
||||
MAX_LOG_LINES="${MAX_LOG_LINES:-80}"
|
||||
CERT_MAX_ROWS="${CERT_MAX_ROWS:-12}"
|
||||
IMAGE_AGE_WARN_DAYS="${IMAGE_AGE_WARN_DAYS:-180}"
|
||||
IMAGE_AGE_ALLOW_FILE="${IMAGE_AGE_ALLOW_FILE:-/mnt/user/services/homelab-infra/services/posture-check/image-age-allow.patterns}"
|
||||
LOG_VOLUME_TOP_N="${LOG_VOLUME_TOP_N:-10}"
|
||||
DISK_USAGE_WARN_PCT="${DISK_USAGE_WARN_PCT:-85}"
|
||||
CERT_WARN_DAYS="${CERT_WARN_DAYS:-21}"
|
||||
@@ -581,13 +582,36 @@ collect_image_freshness() {
|
||||
|
||||
local image_file="$TMP_DIR/images.tsv"
|
||||
local image_warnings=0
|
||||
local image_allowed=0
|
||||
local now_epoch
|
||||
: > "$image_file"
|
||||
now_epoch="$(date +%s)"
|
||||
|
||||
# Parse the image-age allowlist: container deliberately pinned to a stable or
|
||||
# upstream-recommended image. Each entry carries a recheck date; once that
|
||||
# date has passed the suppression lapses, so a pin gets re-reviewed instead
|
||||
# of silently aging forever.
|
||||
local allow_file="$TMP_DIR/image-allow.tsv"
|
||||
: > "$allow_file"
|
||||
if [ -f "$IMAGE_AGE_ALLOW_FILE" ]; then
|
||||
while IFS= read -r line; do
|
||||
line="${line%%#*}"
|
||||
line="$(printf '%s' "$line" | sed -E 's/^[[:space:]]+//; s/[[:space:]]+$//')"
|
||||
[ -n "$line" ] || continue
|
||||
local a_name a_date a_epoch
|
||||
a_name="$(printf '%s' "$line" | awk '{ print $1 }')"
|
||||
a_date="$(printf '%s' "$line" | awk '{ print $2 }')"
|
||||
[ -n "$a_name" ] && [ -n "$a_date" ] || continue
|
||||
a_epoch="$(date -d "$a_date" +%s 2>/dev/null || echo 0)"
|
||||
if [ "$a_epoch" -ge "$now_epoch" ]; then
|
||||
printf '%s\t%s\n' "$a_name" "$a_date" >> "$allow_file"
|
||||
fi
|
||||
done < "$IMAGE_AGE_ALLOW_FILE"
|
||||
fi
|
||||
|
||||
while IFS= read -r name; do
|
||||
[ -n "$name" ] || continue
|
||||
local image_id created_iso created_epoch age_days image_tag
|
||||
local image_id created_iso created_epoch age_days image_tag note recheck
|
||||
image_id="$(docker inspect --format '{{.Image}}' "$name" 2>/dev/null || true)"
|
||||
[ -n "$image_id" ] || continue
|
||||
created_iso="$(docker image inspect --format '{{.Created}}' "$image_id" 2>/dev/null || true)"
|
||||
@@ -596,33 +620,46 @@ collect_image_freshness() {
|
||||
created_epoch="$(date -d "$created_iso" +%s 2>/dev/null || echo 0)"
|
||||
[ "$created_epoch" -gt 0 ] || continue
|
||||
age_days=$(( (now_epoch - created_epoch) / 86400 ))
|
||||
printf '%d\t%s\t%s\n' "$age_days" "$name" "$image_tag" >> "$image_file"
|
||||
note=""
|
||||
if [ "$age_days" -ge "$IMAGE_AGE_WARN_DAYS" ]; then
|
||||
image_warnings=$((image_warnings + 1))
|
||||
recheck="$(awk -F '\t' -v n="$name" '$1 == n { print $2; found = 1 } END { exit !found }' "$allow_file" || true)"
|
||||
if [ -n "$recheck" ]; then
|
||||
note="bewusst gepinnt (recheck $recheck)"
|
||||
image_allowed=$((image_allowed + 1))
|
||||
else
|
||||
note="ueberaltert"
|
||||
image_warnings=$((image_warnings + 1))
|
||||
fi
|
||||
fi
|
||||
printf '%d\t%s\t%s\t%s\n' "$age_days" "$name" "$image_tag" "$note" >> "$image_file"
|
||||
done < <(docker ps --format '{{.Names}}')
|
||||
|
||||
set_summary "image_warnings" "$image_warnings"
|
||||
set_summary "image_allowed" "$image_allowed"
|
||||
|
||||
if [ ! -s "$image_file" ]; then
|
||||
append "- Keine Image-Daten verfuegbar."
|
||||
record_section_error "images" "Keine Image-Daten ermittelt"
|
||||
else
|
||||
append "- Schwelle Warnung: Image aelter als $IMAGE_AGE_WARN_DAYS Tage"
|
||||
append "- Container mit Image >= $IMAGE_AGE_WARN_DAYS Tage: $image_warnings"
|
||||
append "- Container mit ueberaltertem Image (gewarnt): $image_warnings"
|
||||
append "- Davon bewusst gepinnt (von Warnung ausgenommen): $image_allowed"
|
||||
append "- Allowlist-Quelle: \`$IMAGE_AGE_ALLOW_FILE\`"
|
||||
append ""
|
||||
append "### Aelteste Images (Top 10)"
|
||||
append ""
|
||||
append "| Alter Tage | Container | Image |"
|
||||
append "|---:|---|---|"
|
||||
sort -nr "$image_file" | head -n 10 | while IFS="$(printf '\t')" read -r age name img; do
|
||||
append "| $age | $name | $img |"
|
||||
append "| Alter Tage | Container | Image | Hinweis |"
|
||||
append "|---:|---|---|---|"
|
||||
sort -nr "$image_file" | head -n 10 | while IFS="$(printf '\t')" read -r age name img note; do
|
||||
append "| $age | $name | $img | ${note:-} |"
|
||||
done
|
||||
append ""
|
||||
if [ "$image_warnings" -eq 0 ]; then
|
||||
if [ "$image_warnings" -eq 0 ] && [ "$image_allowed" -eq 0 ]; then
|
||||
append "Bewertung: Keine Container mit ueberalterten Images. CVE-Hygiene aus dieser Sicht ok."
|
||||
elif [ "$image_warnings" -eq 0 ]; then
|
||||
append "Bewertung: Keine ungeprueft ueberalterten Images. $image_allowed Container sind bewusst gepinnt und mit Recheck-Datum dokumentiert."
|
||||
else
|
||||
append "Bewertung: $image_warnings Container nutzen Images aelter als $IMAGE_AGE_WARN_DAYS Tage. Update-Pipeline und CVE-Status pruefen."
|
||||
append "Bewertung: $image_warnings Container nutzen ueberalterte Images (nicht in der Allowlist). Update-Pipeline und CVE-Status pruefen."
|
||||
fi
|
||||
fi
|
||||
append ""
|
||||
@@ -663,6 +700,31 @@ collect_container_events() {
|
||||
collect_container_state() {
|
||||
append "## Container-Zustand"
|
||||
append ""
|
||||
|
||||
append "### Unhealthy Container"
|
||||
local unhealthy_file="$TMP_DIR/unhealthy.log"
|
||||
docker ps --filter health=unhealthy --format '{{.Names}}' > "$unhealthy_file"
|
||||
if [ ! -s "$unhealthy_file" ]; then
|
||||
append "- Keine."
|
||||
else
|
||||
append "| Container | FailingStreak | Letzter Healthcheck |"
|
||||
append "|---|---:|---|"
|
||||
while IFS= read -r name; do
|
||||
[ -n "$name" ] || continue
|
||||
local streak hc
|
||||
streak="$(docker inspect "$name" --format '{{.State.Health.FailingStreak}}' 2>/dev/null || echo '?')"
|
||||
# Letzten nicht-leeren Health-Log-Eintrag holen, einzeilig machen und
|
||||
# Pipe-Zeichen escapen, damit die Markdown-Tabelle nicht bricht.
|
||||
hc="$(docker inspect "$name" --format '{{range .State.Health.Log}}exit={{.ExitCode}} out={{.Output}}~~~{{end}}' 2>/dev/null \
|
||||
| tr '\n' ' ' \
|
||||
| awk -F '~~~' '{ for (i = NF - 1; i >= 1; i--) { if ($i != "") { print $i; break } } }' \
|
||||
| sed -E 's/[[:space:]]+/ /g; s/\|/\\|/g' \
|
||||
| cut -c1-160)"
|
||||
append "| \`$name\` | ${streak:-?} | ${hc:-(kein Output)} |"
|
||||
done < "$unhealthy_file"
|
||||
fi
|
||||
append ""
|
||||
|
||||
append "### Nicht laufende Container"
|
||||
local stopped_file="$TMP_DIR/stopped.log"
|
||||
docker ps -a --filter status=exited --filter status=dead --filter status=created --format '{{.Names}}\t{{.Status}}' > "$stopped_file"
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
# image-age-allow.patterns - Daily Operations Report
|
||||
#
|
||||
# Container, die bewusst auf einem aelteren, aber aktuellen/empfohlenen Image
|
||||
# gepinnt sind, sollen nicht jeden Tag als "Image ueberaltert" warnen.
|
||||
#
|
||||
# Format pro Zeile:
|
||||
# <container-name> <YYYY-MM-DD recheck> # Begruendung
|
||||
#
|
||||
# - Spalte 1: exakter Container-Name (docker ps {{.Names}}).
|
||||
# - Spalte 2: Recheck-Datum. NACH diesem Datum greift die Ausnahme NICHT
|
||||
# mehr und der Container taucht wieder als Warnung auf -> erzwingt eine
|
||||
# menschliche Neubewertung statt stillen Alterns.
|
||||
# - Alles nach '#' ist Kommentar. Leerzeilen werden ignoriert.
|
||||
#
|
||||
# Eine Ausnahme heisst NICHT "Image egal", sondern "am Datum X erneut pruefen,
|
||||
# ob es noch die empfohlene/aktuelle Version ist".
|
||||
#
|
||||
# Last reviewed: 2026-06-10
|
||||
|
||||
# immich_postgres: exakt das von Immich offiziell empfohlene, per Digest
|
||||
# gepinnte DB-Image (14-vectorchord0.4.3-pgvectors0.2.0). Immichs eigene
|
||||
# docker-compose auf main pinnt am 2026-06-10 denselben Tag inkl. identischem
|
||||
# Digest. Kein Update, solange Immich nichts Neueres empfiehlt.
|
||||
# Re-check: ob Immich ein neueres Postgres-Image empfiehlt.
|
||||
immich_postgres 2026-09-10
|
||||
|
||||
# monitoring-blackbox-exporter: v0.28.0 ist am 2026-06-10 die NEUESTE Release
|
||||
# (Dez 2025). Das Image-Alter ist nur Build-Alter, keine veraltete Version.
|
||||
# Re-check: ob eine blackbox_exporter-Version > v0.28.0 erschienen ist.
|
||||
monitoring-blackbox-exporter 2026-09-10
|
||||
Reference in New Issue
Block a user