#!/usr/bin/env bash set -euo pipefail SCRIPT_START="$(date +%s)" REPORT_DIR="${REPORT_DIR:-/mnt/user/services/posture-check/daily-reports}" REPORT_DATE="${REPORT_DATE:-$(date +%F)}" REPORT_PATH="${REPORT_PATH:-$REPORT_DIR/homelab-day-$REPORT_DATE.md}" PERSISTENT_SUMMARY_PATH="${PERSISTENT_SUMMARY_PATH:-$REPORT_DIR/summary-$REPORT_DATE.env}" SINCE="${SINCE:-24h}" MAX_LOG_LINES="${MAX_LOG_LINES:-80}" CERT_MAX_ROWS="${CERT_MAX_ROWS:-12}" IMAGE_AGE_WARN_DAYS="${IMAGE_AGE_WARN_DAYS:-180}" LOG_VOLUME_TOP_N="${LOG_VOLUME_TOP_N:-10}" DISK_USAGE_WARN_PCT="${DISK_USAGE_WARN_PCT:-85}" CERT_WARN_DAYS="${CERT_WARN_DAYS:-21}" BACKUP_DRIFT_FACTOR="${BACKUP_DRIFT_FACTOR:-2.0}" SHOW_KNOWN_NOISE="${SHOW_KNOWN_NOISE:-0}" SEND_MAIL="${SEND_MAIL:-0}" MAIL_MODE="${MAIL_MODE:-always}" MAIL_SCRIPT="${MAIL_SCRIPT:-/mnt/user/services/homelab-infra/services/posture-check/send-operations-report-mail.sh}" SEND_NTFY="${SEND_NTFY:-0}" NTFY_TOPIC="${NTFY_TOPIC:-homelab-info}" NTFY_SCRIPT="${NTFY_SCRIPT:-/mnt/user/services/homelab-infra/ops/restore-tests/send-ntfy.sh}" BORG_CONTAINER="${BORG_CONTAINER:-borg-ui}" PROMETHEUS_CONTAINER="${PROMETHEUS_CONTAINER:-monitoring-prometheus}" TRAEFIK_ACME_PATH="${TRAEFIK_ACME_PATH:-/mnt/user/appdata/traefik/letsencrypt/acme.json}" NOISE_PATTERNS_FILE="${NOISE_PATTERNS_FILE:-/mnt/user/services/homelab-infra/services/posture-check/log-noise.patterns}" NORMALIZE_NOISE_SCRIPT="${NORMALIZE_NOISE_SCRIPT:-/mnt/user/services/homelab-infra/services/posture-check/lib/normalize-noise-patterns.sh}" NOISE_ESCALATION_THRESHOLD="${NOISE_ESCALATION_THRESHOLD:-500}" NOISE_BREAKDOWN_TOP_N="${NOISE_BREAKDOWN_TOP_N:-10}" POSTURE_CHECK_FILE="${POSTURE_CHECK_FILE:-/mnt/user/services/posture-check/last.json}" LOCK_FILE="${LOCK_FILE:-/tmp/homelab-daily-report.lock}" REPORT_STATUS="UNKNOWN" exec 9>"$LOCK_FILE" if ! flock -n 9; then echo "Another daily-status-report run is already in progress (lock: $LOCK_FILE)" >&2 exit 3 fi TMP_DIR="$(mktemp -d /tmp/homelab-daily-report.XXXXXX)" BODY_PATH="$TMP_DIR/body.md" SUMMARY_PATH="$TMP_DIR/summary.env" SECTION_ERRORS_FILE="$TMP_DIR/section-errors.log" : > "$BODY_PATH" : > "$SUMMARY_PATH" : > "$SECTION_ERRORS_FILE" cleanup() { rm -rf "$TMP_DIR" } trap cleanup EXIT append() { printf '%s\n' "$*" >> "$BODY_PATH" } append_block() { cat >> "$BODY_PATH" } set_summary() { printf '%s=%s\n' "$1" "$2" >> "$SUMMARY_PATH" } record_section_error() { printf '%s: %s\n' "$1" "$2" >> "$SECTION_ERRORS_FILE" } have_container() { docker inspect "$1" >/dev/null 2>&1 } count_lines() { wc -l | awk '{ print $1 + 0 }' } shorten() { sed -E 's/[[:space:]]+/ /g' | cut -c 1-260 } format_duration() { local s="${1:-0}" if ! printf '%s' "$s" | grep -Eq '^[0-9]+$'; then printf '?\n' return fi local d=$(( s / 86400 )) local h=$(( (s % 86400) / 3600 )) local m=$(( (s % 3600) / 60 )) local sec=$(( s % 60 )) if [ "$d" -gt 0 ]; then printf '%d Tage %d Stunden\n' "$d" "$h" elif [ "$h" -gt 0 ]; then printf '%d Stunden %d Minuten\n' "$h" "$m" elif [ "$m" -gt 0 ]; then printf '%d Minuten %d Sekunden\n' "$m" "$sec" else printf '%d Sekunden\n' "$sec" fi } collect_overview() { local running total unhealthy exited_nonzero total="$(docker ps -a --format '{{.Names}}' | count_lines)" running="$(docker ps --format '{{.Names}}' | count_lines)" unhealthy="$(docker ps --filter health=unhealthy --format '{{.Names}}' | count_lines)" exited_nonzero="$(docker ps -a --filter status=exited --format '{{.Names}} {{.Status}}' | awk '!/Exited \(0\)/ { count++ } END { print count + 0 }')" set_summary "containers_total" "$total" set_summary "containers_running" "$running" set_summary "containers_unhealthy" "$unhealthy" set_summary "containers_exited_nonzero" "$exited_nonzero" append "## Betriebslage" append "" append "- Container: $running/$total laufen" append "- Unhealthy Container: $unhealthy" append "- Exited non-zero Container: $exited_nonzero" if [ -f "$POSTURE_CHECK_FILE" ]; then local posture_status posture_age now_epoch posture_status="$(sed -n 's/.*"status": *"\([^"]*\)".*/\1/p' "$POSTURE_CHECK_FILE" | head -n 1)" now_epoch="$(date +%s)" posture_age=$(( now_epoch - $(stat -c %Y "$POSTURE_CHECK_FILE" 2>/dev/null || echo "$now_epoch") )) append "- Letzter Posture-Check: ${posture_status:-unbekannt} (Datei ist $(format_duration "$posture_age") alt)" set_summary "posture_status" "${posture_status:-unknown}" set_summary "posture_age_seconds" "$posture_age" else append "- Letzter Posture-Check: keine Datei gefunden" set_summary "posture_status" "missing" record_section_error "overview" "Posture-Check-Datei $POSTURE_CHECK_FILE fehlt" fi append "" } collect_host_health() { append "## Host" append "" local boot_epoch boot_iso uptime_seconds load_1 load_5 load_15 now_epoch now_epoch="$(date +%s)" boot_epoch="$(awk '/^btime/ { print $2 }' /proc/stat 2>/dev/null || echo 0)" if [ "${boot_epoch:-0}" -gt 0 ]; then boot_iso="$(date -u -d "@$boot_epoch" -Iseconds 2>/dev/null || echo unknown)" uptime_seconds=$(( now_epoch - boot_epoch )) else boot_iso="unknown" uptime_seconds=0 record_section_error "host" "/proc/stat btime nicht lesbar" fi if [ "$uptime_seconds" -lt 0 ]; then uptime_seconds=0 fi if [ -r /proc/loadavg ]; then read -r load_1 load_5 load_15 _ < /proc/loadavg else load_1="?"; load_5="?"; load_15="?" fi append "- Hostname: \`$(hostname)\`" append "- Boot-Zeit: \`$boot_iso\`" append "- Uptime: $(format_duration "$uptime_seconds")" append "- Load average (1/5/15): $load_1 / $load_5 / $load_15" if [ "$uptime_seconds" -lt 86400 ]; then append "- WARNUNG: Boot innerhalb der letzten 24 Stunden erkannt." set_summary "host_recent_boot" "1" else append "- Reboot in den letzten 24h: nein" set_summary "host_recent_boot" "0" fi set_summary "host_uptime_seconds" "$uptime_seconds" set_summary "host_load_1" "$load_1" append "" } derive_report_status() { # shellcheck disable=SC1090 . "$SUMMARY_PATH" REPORT_STATUS="OK" local has_warn=0 has_crit=0 [ "${borg_status:-unknown}" != "completed" ] && has_warn=1 [ "${prometheus_alerts:-0}" = "unknown" ] && has_warn=1 [ "${cert_warnings:-0}" != "0" ] && has_warn=1 [ "${disk_warnings:-0}" != "0" ] && has_warn=1 [ "${image_warnings:-0}" != "0" ] && has_warn=1 [ "${containers_exited_nonzero:-0}" != "0" ] && has_warn=1 [ "${host_recent_boot:-0}" = "1" ] && has_warn=1 [ "${backup_duration_drift:-0}" = "1" ] && has_warn=1 [ "${noise_threshold_exceeded:-0}" != "0" ] && has_warn=1 if [ "${prometheus_alerts_pending:-0}" != "0" ] && [ "${prometheus_alerts_pending:-0}" != "unknown" ]; then has_warn=1 fi [ "${borg_status:-unknown}" = "failed" ] && has_crit=1 [ "${borg_status:-unknown}" = "error" ] && has_crit=1 [ "${containers_unhealthy:-0}" != "0" ] && has_crit=1 if [ "${prometheus_alerts_firing:-0}" != "0" ] && [ "${prometheus_alerts_firing:-0}" != "unknown" ]; then has_crit=1 fi if [ "$has_crit" -eq 1 ]; then REPORT_STATUS="KRITISCH" elif [ "$has_warn" -eq 1 ]; then REPORT_STATUS="WARNUNG" fi set_summary "report_status" "$REPORT_STATUS" } collect_borg() { append "## Borg Backup" append "" if ! have_container "$BORG_CONTAINER"; then append "- WARNUNG: Container \`$BORG_CONTAINER\` nicht gefunden." append "" set_summary "borg_status" "unknown" set_summary "backup_duration_drift" "unknown" record_section_error "borg" "Container $BORG_CONTAINER nicht gefunden" return fi if ! docker exec -i "$BORG_CONTAINER" python3 - <<'PY' >> "$BODY_PATH" import sqlite3 def fmt_bytes(value): if value is None: return "-" value = float(value) units = ["B", "KB", "MB", "GB", "TB"] for unit in units: if value < 1024 or unit == units[-1]: return f"{value:.1f} {unit}" if unit != "B" else f"{int(value)} B" value /= 1024 def fmt_sec(s): s = int(s) h, rem = divmod(s, 3600) m, sec = divmod(rem, 60) if h > 0: return f"{h}h {m}m" return f"{m}m {sec}s" conn = sqlite3.connect("/data/borg.db") conn.row_factory = sqlite3.Row cur = conn.cursor() print("### Letzte Backup-Jobs") rows = cur.execute(""" select id, status, started_at, completed_at, archive_name, nfiles, original_size, compressed_size, deduplicated_size, error_message from backup_jobs where started_at >= datetime('now', '-30 hours') or created_at >= datetime('now', '-30 hours') order by coalesce(started_at, created_at) desc limit 8 """).fetchall() if not rows: print("- WARNUNG: Kein Backup-Job in den letzten 30 Stunden gefunden.") else: print("| Zeit UTC | Status | Archiv | Dateien | Original | Dedupliziert |") print("|---|---:|---|---:|---:|---:|") for row in rows: archive = row["archive_name"] or "-" if len(archive) > 54: archive = archive[:51] + "..." print( f"| {row['started_at'] or row['completed_at'] or '-'} " f"| {row['status']} " f"| {archive} " f"| {row['nfiles'] if row['nfiles'] is not None else '-'} " f"| {fmt_bytes(row['original_size'])} " f"| {fmt_bytes(row['deduplicated_size'])} |" ) if row["error_message"]: print(f" - Fehler: {row['error_message'][:240]}") print("") print("### Zeitplan") for row in cur.execute(""" select name, enabled, last_run, next_run, cron_expression from scheduled_jobs order by id """): enabled = "aktiv" if row["enabled"] else "pausiert" print(f"- {row['name']}: {enabled}, last={row['last_run'] or '-'}, next={row['next_run'] or '-'}, cron=`{row['cron_expression']}`") print("") print("### Dauer-Drift (Median 14 Tage)") duration_rows = cur.execute(""" select started_at, completed_at, (julianday(completed_at) - julianday(started_at)) * 86400 as duration_seconds from backup_jobs where status = 'completed' and started_at is not null and completed_at is not null and completed_at >= datetime('now', '-14 days') order by completed_at desc """).fetchall() durations = [r["duration_seconds"] for r in duration_rows if r["duration_seconds"] and r["duration_seconds"] > 0] if len(durations) < 3: print(f"- Zu wenig Datenpunkte fuer eine Drift-Bewertung (n={len(durations)}).") else: durations_sorted = sorted(durations) median = durations_sorted[len(durations_sorted) // 2] latest = durations[0] ratio = latest / median if median > 0 else 0 print(f"- Letzter Lauf: {fmt_sec(latest)}") print(f"- Median 14 Tage: {fmt_sec(median)} (n={len(durations)})") print(f"- Verhaeltnis: {ratio:.2f}x") if ratio > 2.0: print(f"- Bewertung: Drift erkannt - letzter Lauf {ratio:.1f}x langsamer als der Median. Quellgroesse, IO und Repo-Zustand pruefen.") else: print("- Bewertung: Backup-Dauer im erwarteten Bereich.") PY then append "- WARNUNG: Borg-Auswertung fehlgeschlagen." set_summary "borg_status" "unknown" set_summary "backup_duration_drift" "unknown" record_section_error "borg" "Python-Auswertung in $BORG_CONTAINER fehlgeschlagen" else local borg_out borg_status borg_drift borg_out="$(docker exec -i "$BORG_CONTAINER" python3 - <<'PY' 2>/dev/null || true import sqlite3 conn = sqlite3.connect("/data/borg.db") conn.row_factory = sqlite3.Row cur = conn.cursor() status_row = cur.execute(""" select status from backup_jobs order by coalesce(started_at, created_at) desc limit 1 """).fetchone() status = status_row[0] if status_row else "missing" duration_rows = cur.execute(""" select (julianday(completed_at) - julianday(started_at)) * 86400 as ds from backup_jobs where status = 'completed' and started_at is not null and completed_at is not null and completed_at >= datetime('now', '-14 days') order by completed_at desc """).fetchall() durations = [r[0] for r in duration_rows if r[0] and r[0] > 0] if len(durations) < 3: drift = "insufficient" else: median = sorted(durations)[len(durations)//2] latest = durations[0] ratio = latest / median if median > 0 else 0 drift = "1" if ratio > 2.0 else "0" print(f"status={status}") print(f"drift={drift}") PY )" borg_status="$(printf '%s' "$borg_out" | sed -n 's/^status=//p' | head -n 1)" borg_drift="$(printf '%s' "$borg_out" | sed -n 's/^drift=//p' | head -n 1)" if [ "${borg_drift:-}" = "1" ]; then set_summary "backup_duration_drift" "1" elif [ "${borg_drift:-}" = "0" ]; then set_summary "backup_duration_drift" "0" else set_summary "backup_duration_drift" "unknown" fi set_summary "borg_status" "${borg_status:-unknown}" fi append "" } collect_prometheus() { append "## Prometheus Alerts" append "" if ! have_container "$PROMETHEUS_CONTAINER"; then append "- WARNUNG: Container \`$PROMETHEUS_CONTAINER\` nicht gefunden." append "" set_summary "prometheus_alerts" "unknown" set_summary "prometheus_alerts_firing" "unknown" set_summary "prometheus_alerts_pending" "unknown" record_section_error "prometheus" "Container $PROMETHEUS_CONTAINER nicht gefunden" return fi local alerts alerts="$(docker exec "$PROMETHEUS_CONTAINER" wget -qO- http://localhost:9090/api/v1/alerts 2>/dev/null || true)" if [ -z "$alerts" ]; then append "- WARNUNG: Prometheus Alerts API nicht erreichbar." set_summary "prometheus_alerts" "unknown" set_summary "prometheus_alerts_firing" "unknown" set_summary "prometheus_alerts_pending" "unknown" record_section_error "prometheus" "Alerts-API leer oder nicht erreichbar" elif printf '%s' "$alerts" | grep -q '"alerts":\[\]'; then append "- Keine aktiven Alerts." set_summary "prometheus_alerts" "0" set_summary "prometheus_alerts_firing" "0" set_summary "prometheus_alerts_pending" "0" else local total firing pending total="$(printf '%s' "$alerts" | grep -o '"alertname":"[^"]*"' | count_lines)" firing="$(printf '%s' "$alerts" | grep -o '"state":"firing"' | count_lines)" pending="$(printf '%s' "$alerts" | grep -o '"state":"pending"' | count_lines)" append "- Aktive Alerts insgesamt: $total" append "- Davon firing: $firing" append "- Davon pending: $pending" append "" append "### Details" printf '%s' "$alerts" \ | grep -o '"alertname":"[^"]*"\|"severity":"[^"]*"\|"instance":"[^"]*"\|"service":"[^"]*"\|"state":"[^"]*"' \ | sed 's/^/ - /' >> "$BODY_PATH" set_summary "prometheus_alerts" "$total" set_summary "prometheus_alerts_firing" "$firing" set_summary "prometheus_alerts_pending" "$pending" fi append "" } collect_certificate_health() { append "## Zertifikate" append "" local cert_file="$TMP_DIR/certificates.tsv" local cert_sorted="$TMP_DIR/certificates.sorted.tsv" local warning_count=0 local total_count=0 : > "$cert_file" if [ ! -f "$TRAEFIK_ACME_PATH" ]; then append "- WARNUNG: Traefik ACME-Datei nicht gefunden: $TRAEFIK_ACME_PATH" set_summary "cert_warnings" "1" record_section_error "certificates" "ACME-Datei $TRAEFIK_ACME_PATH fehlt" append "" return fi if docker run -i --rm \ -v "$TRAEFIK_ACME_PATH:/acme.json:ro" \ python:3.13-alpine python - <<'PY' > "$cert_file" import base64 import json import ssl import tempfile from datetime import datetime, timezone with open("/acme.json", "r", encoding="utf-8") as handle: data = json.load(handle) now = datetime.now(timezone.utc) for resolver in data.values(): for cert in resolver.get("Certificates", []): domain = cert.get("domain", {}).get("main") or "-" sans = cert.get("domain", {}).get("sans") or [] cert_b64 = cert.get("certificate") if not cert_b64: continue pem = base64.b64decode(cert_b64) with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(pem) tmp_path = tmp.name decoded = ssl._ssl._test_decode_cert(tmp_path) not_after = datetime.strptime(decoded["notAfter"], "%b %d %H:%M:%S %Y %Z").replace(tzinfo=timezone.utc) days = (not_after - now).days names = ", ".join([domain, *sans]) print(f"{days}\t{not_after.date().isoformat()}\t{names}") PY then if [ ! -s "$cert_file" ]; then append "- WARNUNG: Keine Zertifikate in ACME-Datei gefunden." warning_count=1 record_section_error "certificates" "ACME-Datei enthielt keine Zertifikate" else sort -n "$cert_file" > "$cert_sorted" total_count="$(count_lines < "$cert_sorted")" append "- Zertifikate gesamt: $total_count" append "- Anzeige: die $CERT_MAX_ROWS Zertifikate mit der kuerzesten Restlaufzeit" append "- Schwelle Warnung: weniger als $CERT_WARN_DAYS Tage" append "" append "| Resttage | Ablaufdatum UTC | Domains |" append "|---:|---|---|" while IFS="$(printf '\t')" read -r days expires domains; do append "| $days | $expires | $domains |" if [ "${days:-0}" -lt "$CERT_WARN_DAYS" ]; then warning_count=$((warning_count + 1)) fi done < <(head -n "$CERT_MAX_ROWS" "$cert_sorted") while IFS="$(printf '\t')" read -r days _expires _domains; do if [ "${days:-0}" -lt "$CERT_WARN_DAYS" ]; then warning_count=$((warning_count + 1)) fi done < <(tail -n +"$((CERT_MAX_ROWS + 1))" "$cert_sorted") append "" if [ "$warning_count" -eq 0 ]; then append "Bewertung: Keine Zertifikate im kritischen Erneuerungsfenster unter $CERT_WARN_DAYS Tagen." else append "Bewertung: $warning_count Zertifikat(e) laufen in weniger als $CERT_WARN_DAYS Tagen ab und sollten beobachtet werden." fi fi else append "- WARNUNG: Zertifikate konnten nicht aus ACME-Datei gelesen werden." warning_count=1 record_section_error "certificates" "Auswertung der ACME-Datei fehlgeschlagen" fi set_summary "cert_warnings" "$warning_count" append "" } collect_disk_health() { append "## Storage / Filesystem" append "" local disk_warnings=0 local paths="/mnt/cache /mnt/disk1 /mnt/user /mnt/user/appdata /mnt/user/backups" append "- Schwelle Warnung: Nutzung ab ${DISK_USAGE_WARN_PCT}%" append "" append "| Pfad | Filesystem | Nutzung | Frei | Bewertung |" append "|---|---|---:|---:|---|" for path in $paths; do if [ ! -e "$path" ]; then append "| $path | - | - | - | fehlt |" disk_warnings=$((disk_warnings + 1)) record_section_error "disk" "Kernpfad $path fehlt" continue fi local fstype usage avail verdict fstype="$(findmnt -T "$path" -no FSTYPE 2>/dev/null | head -n 1 || true)" usage="$(df -P "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')" avail="$(df -hP "$path" 2>/dev/null | awk 'NR==2 { print $4 }')" verdict="ok" if ! printf '%s' "${usage:-}" | grep -Eq '^[0-9]+$'; then usage="-" verdict="unbekannt" disk_warnings=$((disk_warnings + 1)) elif [ "$usage" -ge "$DISK_USAGE_WARN_PCT" ]; then verdict="Warnung: >=${DISK_USAGE_WARN_PCT}%" disk_warnings=$((disk_warnings + 1)) fi append "| $path | ${fstype:-unbekannt} | ${usage}% | ${avail:-?} | $verdict |" done append "" if [ "$disk_warnings" -eq 0 ]; then append "Bewertung: Keine kritischen Fuellstaende oder fehlenden Kernpfade erkannt." else append "Bewertung: $disk_warnings Storage-/Filesystem-Punkt(e) brauchen Aufmerksamkeit." fi set_summary "disk_warnings" "$disk_warnings" append "" } collect_image_freshness() { append "## Image-Aktualitaet" append "" local image_file="$TMP_DIR/images.tsv" local image_warnings=0 local now_epoch : > "$image_file" now_epoch="$(date +%s)" while IFS= read -r name; do [ -n "$name" ] || continue local image_id created_iso created_epoch age_days image_tag image_id="$(docker inspect --format '{{.Image}}' "$name" 2>/dev/null || true)" [ -n "$image_id" ] || continue created_iso="$(docker image inspect --format '{{.Created}}' "$image_id" 2>/dev/null || true)" image_tag="$(docker inspect --format '{{.Config.Image}}' "$name" 2>/dev/null || echo '?')" [ -n "$created_iso" ] || continue created_epoch="$(date -d "$created_iso" +%s 2>/dev/null || echo 0)" [ "$created_epoch" -gt 0 ] || continue age_days=$(( (now_epoch - created_epoch) / 86400 )) printf '%d\t%s\t%s\n' "$age_days" "$name" "$image_tag" >> "$image_file" if [ "$age_days" -ge "$IMAGE_AGE_WARN_DAYS" ]; then image_warnings=$((image_warnings + 1)) fi done < <(docker ps --format '{{.Names}}') set_summary "image_warnings" "$image_warnings" if [ ! -s "$image_file" ]; then append "- Keine Image-Daten verfuegbar." record_section_error "images" "Keine Image-Daten ermittelt" else append "- Schwelle Warnung: Image aelter als $IMAGE_AGE_WARN_DAYS Tage" append "- Container mit Image >= $IMAGE_AGE_WARN_DAYS Tage: $image_warnings" append "" append "### Aelteste Images (Top 10)" append "" append "| Alter Tage | Container | Image |" append "|---:|---|---|" sort -nr "$image_file" | head -n 10 | while IFS="$(printf '\t')" read -r age name img; do append "| $age | $name | $img |" done append "" if [ "$image_warnings" -eq 0 ]; then append "Bewertung: Keine Container mit ueberalterten Images. CVE-Hygiene aus dieser Sicht ok." else append "Bewertung: $image_warnings Container nutzen Images aelter als $IMAGE_AGE_WARN_DAYS Tage. Update-Pipeline und CVE-Status pruefen." fi fi append "" } collect_container_events() { append "## Docker Events ($SINCE)" append "" local events_file="$TMP_DIR/docker-events.log" timeout 20 docker events \ --since "$SINCE" \ --until "$(date -Iseconds)" \ --filter event=die \ --filter event=oom \ --filter event=kill \ --filter event=restart \ --format '{{.Time}}|{{.Actor.Attributes.name}}|{{.Action}}|{{.Actor.Attributes.exitCode}}|{{.Actor.Attributes.image}}' \ | awk -F '|' '!(($3 == "die") && ($4 == "0")) { print }' \ > "$events_file" 2>/dev/null || true local event_count event_count="$(count_lines < "$events_file")" set_summary "docker_events" "$event_count" if [ "$event_count" -eq 0 ]; then append '- Keine `die`/`oom`/`kill`/`restart` Events im Zeitraum.' else append "- Relevante Events: $event_count" append "" append '```text' tail -n 80 "$events_file" >> "$BODY_PATH" append '```' fi append "" } collect_container_state() { append "## Container-Zustand" append "" append "### Nicht laufende Container" local stopped_file="$TMP_DIR/stopped.log" docker ps -a --filter status=exited --filter status=dead --filter status=created --format '{{.Names}}\t{{.Status}}' > "$stopped_file" if [ ! -s "$stopped_file" ]; then append "- Keine." else append '```text' cat "$stopped_file" >> "$BODY_PATH" append '```' fi append "" append "### Container mit RestartCount > 0" local restart_file="$TMP_DIR/restarts.log" : > "$restart_file" while IFS= read -r name; do [ -n "$name" ] || continue local count count="$(docker inspect "$name" --format '{{.RestartCount}}' 2>/dev/null || echo 0)" if [ "${count:-0}" -gt 0 ]; then printf '%s\t%s\n' "$name" "$count" >> "$restart_file" fi done < <(docker ps -a --format '{{.Names}}') if [ ! -s "$restart_file" ]; then append "- Keine." else append '```text' sort -k2,2nr "$restart_file" >> "$BODY_PATH" append '```' fi append "" } collect_traefik_5xx() { append "## Traefik 5xx ($SINCE)" append "" if ! have_container traefik; then append "- Traefik-Container nicht gefunden." append "" record_section_error "traefik" "Container traefik nicht gefunden" return fi local file="$TMP_DIR/traefik-5xx.log" docker logs --since "$SINCE" traefik 2>&1 \ | awk '$9 ~ /^5[0-9][0-9]$/ { print }' \ > "$file" || true local count count="$(count_lines < "$file")" set_summary "traefik_5xx" "$count" if [ "$count" -eq 0 ]; then append "- Keine 5xx-Antworten." else append "- 5xx-Antworten: $count" append "" append "### Gruppiert nach Service/Code" append '```text' awk '{ code=$9; service=$12; gsub(/"/, "", service); counts[service " " code]++ } END { for (k in counts) print counts[k], k }' "$file" | sort -nr >> "$BODY_PATH" append '```' append "" append "### Letzte Zeilen" append '```text' tail -n "$MAX_LOG_LINES" "$file" >> "$BODY_PATH" append '```' fi append "" } collect_log_highlights() { append "## Log-Auswertung ($SINCE)" append "" append "Ziel dieses Abschnitts ist nicht, Rohlogs zu wiederholen, sondern handlungsrelevante Auffaelligkeiten auszusortieren." append "" local hits="$TMP_DIR/log-hits.log" local attention="$TMP_DIR/log-attention.log" local known_noise="$TMP_DIR/log-known-noise.log" : > "$hits" : > "$attention" : > "$known_noise" while IFS= read -r name; do [ -n "$name" ] || continue docker logs --since "$SINCE" "$name" 2>&1 \ | grep -Eai 'error|fatal|panic|exception|failed|denied|unauthorized|forbidden|oom' \ | grep -Eavi 'level=info|levelname.: .INFO| 200 OK| 404 Not Found|healthcheck|probe_success' \ | grep -Eavi 'production.DEBUG|stats_refresh_scheduler.*errors.: 0|Sync completed.*Failed: 0' \ | sed -E 's/(refresh_token: )[A-Za-z0-9._-]+/\1[REDACTED]/Ig; s/(token: )[A-Za-z0-9._-]+/\1[REDACTED]/Ig; s/(Authorization: )[A-Za-z0-9._ -]+/\1[REDACTED]/Ig' \ | sed "s/^/[$name] /" >> "$hits" || true done < <(docker ps --format '{{.Names}}') # Normalize the noise pattern file (drop comments, empty lines, trim # whitespace). An empty or whitespace-only pattern line would otherwise # make grep -Eaif match every hit and silently wipe the log highlights. local noise_normalized="$TMP_DIR/noise.patterns.normalized" : > "$noise_normalized" if [ -f "$NOISE_PATTERNS_FILE" ]; then if [ -x "$NORMALIZE_NOISE_SCRIPT" ]; then "$NORMALIZE_NOISE_SCRIPT" "$NOISE_PATTERNS_FILE" > "$noise_normalized" 2>/dev/null || : > "$noise_normalized" else record_section_error "log-highlights" "Normalize-Helper fehlt oder nicht ausfuehrbar: $NORMALIZE_NOISE_SCRIPT - Noise-Patterns ungenormt verwendet" # Fallback inline (same logic as the helper) so we still avoid the # "empty line matches all" trap. grep -Ev '^[[:space:]]*(#|$)' "$NOISE_PATTERNS_FILE" 2>/dev/null \ | sed -E 's/^[[:space:]]+//; s/[[:space:]]+$//' \ | grep -v '^$' > "$noise_normalized" || : > "$noise_normalized" fi else record_section_error "log-highlights" "Noise-Pattern-Datei $NOISE_PATTERNS_FILE fehlt - alle Treffer gelten als handlungsrelevant" fi if [ -s "$hits" ]; then if [ -s "$noise_normalized" ]; then grep -Eaif "$noise_normalized" "$hits" > "$known_noise" || true fi if [ -s "$known_noise" ]; then # Normalisierung gegen abweichende Whitespace-Enden sed -E 's/[[:space:]]+$//' "$known_noise" > "$known_noise.norm" sed -E 's/[[:space:]]+$//' "$hits" > "$hits.norm" grep -Fvxf "$known_noise.norm" "$hits.norm" > "$attention" || true else cp "$hits" "$attention" fi fi # Per-container noise breakdown (always computed, even if SHOW_KNOWN_NOISE=0). local noise_by_container="$TMP_DIR/noise-by-container.tsv" : > "$noise_by_container" if [ -s "$known_noise" ]; then awk -F '[][]' '{ counts[$2]++ } END { for (n in counts) print counts[n] "\t" n }' "$known_noise" \ | sort -nr > "$noise_by_container" fi # Per-pattern noise breakdown: count how often each pattern hit in $hits. # Note: a single hit line may match multiple patterns; counts can overlap. local noise_by_pattern="$TMP_DIR/noise-by-pattern.tsv" : > "$noise_by_pattern" if [ -s "$noise_normalized" ] && [ -s "$hits" ]; then while IFS= read -r p; do [ -n "$p" ] || continue local pcount pcount="$(grep -Eaic -- "$p" "$hits" 2>/dev/null || echo 0)" if [ "${pcount:-0}" -gt 0 ]; then printf '%d\t%s\n' "$pcount" "$p" >> "$noise_by_pattern" fi done < "$noise_normalized" if [ -s "$noise_by_pattern" ]; then sort -nr -o "$noise_by_pattern" "$noise_by_pattern" fi fi # Threshold escalation: how many patterns produced more than the threshold? local noise_threshold_exceeded=0 if [ -s "$noise_by_pattern" ]; then noise_threshold_exceeded="$(awk -v t="$NOISE_ESCALATION_THRESHOLD" '$1 > t { n++ } END { print n + 0 }' "$noise_by_pattern")" fi set_summary "noise_threshold_exceeded" "$noise_threshold_exceeded" local hit_count attention_count known_noise_count hit_count="$(count_lines < "$hits")" attention_count="$(count_lines < "$attention")" known_noise_count="$(count_lines < "$known_noise")" set_summary "log_highlights" "$attention_count" set_summary "log_hits_total" "$hit_count" set_summary "log_known_noise" "$known_noise_count" if [ "$hit_count" -eq 0 ]; then append "- Keine auffaelligen Logmuster gefunden." else append "- Gefundene Logmuster insgesamt: $hit_count" append "- Davon als bekanntes Rauschen eingeordnet: $known_noise_count" append "- Handlungsrelevante Logmuster: $attention_count" append "- Noise-Pattern-Quelle: \`$NOISE_PATTERNS_FILE\`" append "- Eskalations-Schwelle pro Pattern: $NOISE_ESCALATION_THRESHOLD" if [ "$noise_threshold_exceeded" -gt 0 ]; then append "- WARNUNG: $noise_threshold_exceeded Pattern ueberschreit(en) die Schwelle - bitte pruefen ob noch wirklich Noise." fi append "" if [ "$attention_count" -eq 0 ]; then append "Bewertung: Keine handlungsrelevanten Logmuster. Die Treffer bestehen aus bekannten, aktuell nicht kritischen Meldungen." else append "Bewertung: Es gibt Logmuster, die nicht automatisch als bekanntes Rauschen eingeordnet wurden. Diese sollten geprueft werden." append "" append "### Betroffene Container" append "" append "| Container | Anzahl |" append "|---|---:|" awk -F '[][]' '{ counts[$2]++ } END { for (name in counts) print "| " name " | " counts[name] " |" }' "$attention" | sort >> "$BODY_PATH" append "" append "### Beispiele" append "" append '```text' awk -F '[][]' ' { name=$2 if (seen[name] < 3) { line=$0 gsub(/[[:space:]]+/, " ", line) if (length(line) > 220) line=substr(line, 1, 217) "..." print line seen[name]++ } } ' "$attention" | head -n "$MAX_LOG_LINES" >> "$BODY_PATH" append '```' fi if [ "$known_noise_count" -gt 0 ]; then append "" append "### Bekanntes Rauschen (Top)" append "" if [ -s "$noise_by_container" ]; then append "#### Container mit den meisten Noise-Treffern" append "" append "| Container | Anzahl |" append "|---|---:|" head -n "$NOISE_BREAKDOWN_TOP_N" "$noise_by_container" \ | while IFS="$(printf '\t')" read -r cnt cname; do append "| ${cname:-?} | $cnt |" done append "" fi if [ -s "$noise_by_pattern" ]; then append "#### Pattern mit den meisten Treffern" append "" append "| Pattern | Anzahl |" append "|---|---:|" head -n "$NOISE_BREAKDOWN_TOP_N" "$noise_by_pattern" \ | while IFS="$(printf '\t')" read -r cnt pat; do local short="$pat" if [ "${#short}" -gt 80 ]; then short="${short:0:77}..." fi # Escape pipe characters that would break the markdown table. short="${short//|/\\|}" append "| \`$short\` | $cnt |" done append "" fi if [ "$noise_threshold_exceeded" -gt 0 ]; then append "Bewertung: $noise_threshold_exceeded Pattern ueberschreit(en) die Eskalations-Schwelle ($NOISE_ESCALATION_THRESHOLD). Bitte pruefen, ob die als Noise eingeordneten Meldungen noch fachlich Noise sind oder ob sich ein echter Vorfall darunter versteckt." else append "Bewertung: Kein Pattern ueberschreitet die Eskalations-Schwelle ($NOISE_ESCALATION_THRESHOLD)." fi fi if [ "$known_noise_count" -gt 0 ] && [ "$SHOW_KNOWN_NOISE" = "1" ]; then append "" append "### Ausgeblendetes bekanntes Rauschen (Top 50 Zeilen)" append "" append '```text' head -n 50 "$known_noise" >> "$BODY_PATH" append '```' fi fi append "" } collect_log_volume() { append "## Log-Volumen ($SINCE)" append "" local volume_file="$TMP_DIR/log-volume.tsv" : > "$volume_file" while IFS= read -r name; do [ -n "$name" ] || continue local count count="$(docker logs --since "$SINCE" "$name" 2>&1 | count_lines)" printf '%d\t%s\n' "$count" "$name" >> "$volume_file" done < <(docker ps --format '{{.Names}}') local total total="$(awk '{ s += $1 } END { print s + 0 }' "$volume_file")" set_summary "log_volume_total" "$total" if [ "$total" -eq 0 ]; then append "- Keine Logzeilen im Zeitraum (unwahrscheinlich, evtl. Datenquelle pruefen)." record_section_error "log-volume" "Log-Volumen ueber alle Container ist 0" else append "- Zeilen insgesamt im Zeitraum: $total" append "" append "### Top $LOG_VOLUME_TOP_N lauteste Container" append "" append "| Container | Zeilen |" append "|---|---:|" sort -nr "$volume_file" | head -n "$LOG_VOLUME_TOP_N" | while IFS="$(printf '\t')" read -r c n; do append "| $n | $c |" done append "" append "Bewertung: Auffaellig laute Container sind oft ein Frueh-Indikator fuer Endlosschleifen, schlecht konfigurierte Loglevel oder Probe-Spam." fi append "" } collect_diff_yesterday() { append "## Vergleich mit gestern" append "" local yesterday yesterday_summary yesterday="$(date -d 'yesterday' +%F 2>/dev/null || true)" yesterday_summary="$REPORT_DIR/summary-$yesterday.env" if [ -z "$yesterday" ] || [ ! -f "$yesterday_summary" ]; then append "- Keine Vortagsdaten verfuegbar ($yesterday_summary)." append "" return fi local prev_borg= prev_alerts= prev_firing= prev_pending= prev_unhealthy= prev_exited= prev_5xx= prev_events= prev_log= prev_certs= prev_disk= prev_img= prev_drift= prev_vol= while IFS='=' read -r key value; do case "$key" in borg_status) prev_borg="$value" ;; prometheus_alerts) prev_alerts="$value" ;; prometheus_alerts_firing) prev_firing="$value" ;; prometheus_alerts_pending) prev_pending="$value" ;; containers_unhealthy) prev_unhealthy="$value" ;; containers_exited_nonzero) prev_exited="$value" ;; traefik_5xx) prev_5xx="$value" ;; docker_events) prev_events="$value" ;; log_highlights) prev_log="$value" ;; cert_warnings) prev_certs="$value" ;; disk_warnings) prev_disk="$value" ;; image_warnings) prev_img="$value" ;; backup_duration_drift) prev_drift="$value" ;; log_volume_total) prev_vol="$value" ;; esac done < "$yesterday_summary" # shellcheck disable=SC1090 . "$SUMMARY_PATH" append "Vergleich des Datums $REPORT_DATE mit $yesterday." append "" append "| Metrik | Heute | Gestern |" append "|---|---:|---:|" append "| Borg Status | ${borg_status:-?} | ${prev_borg:-?} |" append "| Prometheus Alerts gesamt | ${prometheus_alerts:-?} | ${prev_alerts:-?} |" append "| Prometheus firing | ${prometheus_alerts_firing:-?} | ${prev_firing:-?} |" append "| Prometheus pending | ${prometheus_alerts_pending:-?} | ${prev_pending:-?} |" append "| Container unhealthy | ${containers_unhealthy:-?} | ${prev_unhealthy:-?} |" append "| Container exited non-zero | ${containers_exited_nonzero:-?} | ${prev_exited:-?} |" append "| Docker Events | ${docker_events:-?} | ${prev_events:-?} |" append "| Traefik 5xx | ${traefik_5xx:-?} | ${prev_5xx:-?} |" append "| Log-Highlights | ${log_highlights:-?} | ${prev_log:-?} |" append "| Log-Volumen | ${log_volume_total:-?} | ${prev_vol:-?} |" append "| Zertifikatswarnungen | ${cert_warnings:-?} | ${prev_certs:-?} |" append "| Storage-Warnungen | ${disk_warnings:-?} | ${prev_disk:-?} |" append "| Image-Warnungen | ${image_warnings:-?} | ${prev_img:-?} |" append "| Backup-Dauer-Drift | ${backup_duration_drift:-?} | ${prev_drift:-?} |" append "" local notable=0 if [ "${containers_exited_nonzero:-0}" != "${prev_exited:-0}" ] || \ [ "${containers_unhealthy:-0}" != "${prev_unhealthy:-0}" ] || \ [ "${prometheus_alerts_firing:-0}" != "${prev_firing:-0}" ] || \ [ "${prometheus_alerts_pending:-0}" != "${prev_pending:-0}" ] || \ [ "${log_highlights:-0}" != "${prev_log:-0}" ] || \ [ "${borg_status:-unknown}" != "${prev_borg:-unknown}" ] || \ [ "${backup_duration_drift:-0}" != "${prev_drift:-0}" ]; then notable=1 fi if [ "$notable" -eq 0 ]; then append "Bewertung: Keine relevanten Aenderungen gegenueber gestern." else append "Bewertung: Relevante Aenderungen gegenueber gestern. Details bitte in den einzelnen Abschnitten pruefen." fi append "" } collect_self_health() { append "## Self-Health" append "" local script_duration section_failures script_duration=$(( $(date +%s) - SCRIPT_START )) section_failures="$(count_lines < "$SECTION_ERRORS_FILE")" set_summary "script_duration_seconds" "$script_duration" set_summary "section_failures" "$section_failures" append "- Skript-Laufzeit: $(format_duration "$script_duration") (${script_duration}s)" append "- Sektionen mit Fehlern: $section_failures" append "- Noise-Pattern-Datei vorhanden: $([ -f "$NOISE_PATTERNS_FILE" ] && echo ja || echo nein)" append "- Lock-Datei: \`$LOCK_FILE\`" if [ "$section_failures" -gt 0 ]; then append "" append "### Fehlerhafte Sektionen" append "" while IFS= read -r line; do append "- $line" done < "$SECTION_ERRORS_FILE" fi append "" } write_report() { mkdir -p "$REPORT_DIR" # shellcheck disable=SC1090 . "$SUMMARY_PATH" { printf '# Homelab Operations Report - %s\n\n' "$REPORT_DATE" printf '%s\n' "- Erstellt: \`$(date -Iseconds)\`" printf '%s\n' "- Zeitraum: letzte \`$SINCE\`" printf '%s\n' "- Host: \`$(hostname)\`" printf '%s\n\n' "- Gesamtbewertung: \`$REPORT_STATUS\`" printf '## Executive Summary\n\n' if [ "$REPORT_STATUS" = "OK" ]; then printf 'Im betrachteten Zeitraum zeigt das Homelab eine stabile Betriebslage. Das letzte Borg-Backup ist erfolgreich abgeschlossen, Prometheus meldet keine firing Alerts, keine unhealthy Container, Zertifikate und Storage im erwarteten Bereich.\n\n' elif [ "$REPORT_STATUS" = "WARNUNG" ]; then printf 'Im betrachteten Zeitraum gibt es Punkte, die Aufmerksamkeit verdienen. Der Betrieb ist nicht automatisch als kompromittiert zu bewerten, aber mindestens ein Signal (Backup, Pending Alert, Zertifikat, Storage, Image-Alter, Drift oder Reboot) weicht vom Normalzustand ab.\n\n' else printf 'Im betrachteten Zeitraum liegt ein kritisches Betriebssignal vor. Der Bericht sollte zeitnah gelesen und die betroffenen Komponenten priorisiert geprueft werden.\n\n' fi printf '### Management-Bewertung\n\n' printf '%s\n' "- Status: \`$REPORT_STATUS\`" printf '%s\n' "- Borg Backup: \`${borg_status:-unknown}\`" printf '%s\n' "- Backup-Dauer-Drift: \`${backup_duration_drift:-unknown}\`" printf '%s\n' "- Prometheus Alerts (gesamt/firing/pending): \`${prometheus_alerts:-unknown}\` / \`${prometheus_alerts_firing:-unknown}\` / \`${prometheus_alerts_pending:-unknown}\`" printf '%s\n' "- Container unhealthy: \`${containers_unhealthy:-unknown}\`" printf '%s\n' "- Container exited non-zero: \`${containers_exited_nonzero:-unknown}\`" printf '%s\n' "- Docker Critical Events: \`${docker_events:-unknown}\`" printf '%s\n' "- Traefik 5xx: \`${traefik_5xx:-unknown}\`" printf '%s\n' "- Zertifikatswarnungen: \`${cert_warnings:-unknown}\`" printf '%s\n' "- Storage-Warnungen: \`${disk_warnings:-unknown}\`" printf '%s\n' "- Image-Warnungen: \`${image_warnings:-unknown}\`" printf '%s\n' "- Log-Highlights: \`${log_highlights:-unknown}\`" printf '%s\n' "- Noise-Pattern ueber Schwelle: \`${noise_threshold_exceeded:-0}\`" printf '%s\n' "- Log-Volumen gesamt: \`${log_volume_total:-unknown}\`" printf '%s\n' "- Reboot in letzten 24h: \`${host_recent_boot:-unknown}\`" printf '%s\n\n' "- Sektionsfehler im Skript: \`${section_failures:-unknown}\`" printf '### Einordnung\n\n' printf 'Dieser Report ist ein Management-Lagebericht: Er verdichtet Backup-Status, Container-Zustand, Monitoring-Alerts, Traefik-Fehler, Zertifikate, Storage, Image-Aktualitaet, Log-Volumen und Drift-Indikatoren. Rohlogs werden nur ausschnittsweise gezeigt, damit der Bericht lesbar bleibt und trotzdem nachvollziehbar ist.\n\n' cat "$BODY_PATH" printf '## Schlussbewertung\n\n' if [ "$REPORT_STATUS" = "OK" ]; then printf 'Das Homelab war im betrachteten Zeitraum betriebsfaehig und ohne akute Warnsignale. Es besteht aus diesem Report heraus kein unmittelbarer Handlungsdruck.\n' elif [ "$REPORT_STATUS" = "WARNUNG" ]; then printf 'Das Homelab war grundsaetzlich betriebsfaehig, zeigt aber mindestens eine Auffaelligkeit. Die im Bericht genannten Punkte sollten geprueft und bei Wiederholung nachverfolgt werden.\n' else printf 'Das Homelab zeigt ein kritisches Signal. Die betroffenen Dienste, Backup-Lage und firing Alerts sollten sofort geprueft werden.\n' fi } > "$REPORT_PATH.tmp" mv "$REPORT_PATH.tmp" "$REPORT_PATH" cp "$SUMMARY_PATH" "$PERSISTENT_SUMMARY_PATH.tmp" mv "$PERSISTENT_SUMMARY_PATH.tmp" "$PERSISTENT_SUMMARY_PATH" } send_report_mail() { [ "$SEND_MAIL" = "1" ] || return 0 [ -x "$MAIL_SCRIPT" ] || { echo "Mail script missing or not executable: $MAIL_SCRIPT" >&2 record_section_error "mail" "Mail-Skript $MAIL_SCRIPT fehlt oder nicht ausfuehrbar" return 1 } case "$MAIL_MODE:$REPORT_STATUS" in always:*|warning:WARNUNG|warning:KRITISCH|critical:KRITISCH) "$MAIL_SCRIPT" "$REPORT_PATH" "$REPORT_STATUS" ;; always:*|warning:*|critical:*) # Mode bekannt, aber Status loest keinen Versand aus ;; *) echo "Unknown MAIL_MODE '$MAIL_MODE' - mail not sent. Use always|warning|critical." >&2 record_section_error "mail" "Unbekanntes MAIL_MODE '$MAIL_MODE'" return 1 ;; esac } send_summary_ntfy() { [ "$SEND_NTFY" = "1" ] || return 0 [ -x "$NTFY_SCRIPT" ] || return 0 # shellcheck disable=SC1090 . "$SUMMARY_PATH" local title="Homelab Tagesprotokoll: ${REPORT_STATUS:-unknown} / borg=${borg_status:-unknown}" local priority="default" local body="Report: $REPORT_PATH Status: $REPORT_STATUS Container: ${containers_running:-?}/${containers_total:-?} running, unhealthy=${containers_unhealthy:-?}, exited_nonzero=${containers_exited_nonzero:-?} Borg: ${borg_status:-unknown} (drift=${backup_duration_drift:-unknown}) Prometheus alerts (total/firing/pending): ${prometheus_alerts:-unknown}/${prometheus_alerts_firing:-unknown}/${prometheus_alerts_pending:-unknown} Docker events: ${docker_events:-unknown} Traefik 5xx: ${traefik_5xx:-unknown} Certs warn: ${cert_warnings:-unknown} Disk warn: ${disk_warnings:-unknown} Image warn: ${image_warnings:-unknown} Log highlights: ${log_highlights:-unknown} Log volume: ${log_volume_total:-unknown} Recent boot: ${host_recent_boot:-unknown} Section errors: ${section_failures:-unknown}" case "$REPORT_STATUS" in KRITISCH) priority="urgent" ;; WARNUNG) priority="high" ;; *) priority="default" ;; esac "$NTFY_SCRIPT" "$NTFY_TOPIC" "$title" "$body" "$priority" || true } main() { collect_overview collect_host_health collect_borg collect_prometheus collect_certificate_health collect_disk_health collect_image_freshness collect_container_events collect_container_state collect_traefik_5xx collect_log_highlights collect_log_volume collect_diff_yesterday derive_report_status collect_self_health write_report send_report_mail send_summary_ntfy printf '%s\n' "$REPORT_PATH" } main "$@" case "$REPORT_STATUS" in KRITISCH) exit 2 ;; WARNUNG) exit 1 ;; *) exit 0 ;; esac