|
|
|
@@ -13,6 +13,7 @@ CERT_MAX_ROWS="${CERT_MAX_ROWS:-12}"
|
|
|
|
|
IMAGE_AGE_WARN_DAYS="${IMAGE_AGE_WARN_DAYS:-180}"
|
|
|
|
|
IMAGE_AGE_ALLOW_FILE="${IMAGE_AGE_ALLOW_FILE:-/mnt/user/services/homelab-infra/services/posture-check/image-age-allow.patterns}"
|
|
|
|
|
LOG_VOLUME_TOP_N="${LOG_VOLUME_TOP_N:-10}"
|
|
|
|
|
LOG_VOLUME_OBSERVE_THRESHOLD="${LOG_VOLUME_OBSERVE_THRESHOLD:-100000}"
|
|
|
|
|
DISK_USAGE_WARN_PCT="${DISK_USAGE_WARN_PCT:-85}"
|
|
|
|
|
CERT_WARN_DAYS="${CERT_WARN_DAYS:-21}"
|
|
|
|
|
BACKUP_DRIFT_FACTOR="${BACKUP_DRIFT_FACTOR:-2.0}"
|
|
|
|
@@ -217,6 +218,73 @@ derive_report_status() {
|
|
|
|
|
set_summary "report_status" "$REPORT_STATUS"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
print_status_reasons() {
|
|
|
|
|
local count=0
|
|
|
|
|
|
|
|
|
|
add_reason() {
|
|
|
|
|
printf '%s\n' "- $1"
|
|
|
|
|
count=$((count + 1))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
[ "${borg_status:-unknown}" != "completed" ] && add_reason "Borg Backup ist \`${borg_status:-unknown}\` statt \`completed\`."
|
|
|
|
|
[ "${prometheus_alerts:-0}" = "unknown" ] && add_reason "Prometheus Alerts konnten nicht sicher gelesen werden."
|
|
|
|
|
[ "${cert_warnings:-0}" != "0" ] && add_reason "Zertifikatswarnungen: \`${cert_warnings:-0}\`."
|
|
|
|
|
[ "${disk_warnings:-0}" != "0" ] && add_reason "Storage-Warnungen: \`${disk_warnings:-0}\`."
|
|
|
|
|
if [ "${image_warnings:-0}" != "0" ]; then
|
|
|
|
|
if [ -n "${image_warning_names:-}" ]; then
|
|
|
|
|
add_reason "Image-Warnungen: \`${image_warnings:-0}\` (${image_warning_names})."
|
|
|
|
|
else
|
|
|
|
|
add_reason "Image-Warnungen: \`${image_warnings:-0}\`."
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
[ "${containers_exited_nonzero:-0}" != "0" ] && add_reason "Container exited non-zero: \`${containers_exited_nonzero:-0}\`."
|
|
|
|
|
[ "${host_recent_boot:-0}" = "1" ] && add_reason "Host-Reboot innerhalb der letzten 24 Stunden."
|
|
|
|
|
[ "${backup_duration_drift:-0}" = "1" ] && add_reason "Backup-Dauer-Drift erkannt."
|
|
|
|
|
[ "${noise_threshold_exceeded:-0}" != "0" ] && add_reason "Noise-Pattern ueber Eskalations-Schwelle: \`${noise_threshold_exceeded:-0}\`."
|
|
|
|
|
|
|
|
|
|
if [ "${prometheus_alerts_pending:-0}" != "0" ] && [ "${prometheus_alerts_pending:-0}" != "unknown" ]; then
|
|
|
|
|
add_reason "Prometheus pending Alerts: \`${prometheus_alerts_pending:-0}\`."
|
|
|
|
|
fi
|
|
|
|
|
if [ "${prometheus_alerts_firing:-0}" != "0" ] && [ "${prometheus_alerts_firing:-0}" != "unknown" ]; then
|
|
|
|
|
add_reason "Prometheus firing Alerts: \`${prometheus_alerts_firing:-0}\`."
|
|
|
|
|
fi
|
|
|
|
|
[ "${containers_unhealthy:-0}" != "0" ] && add_reason "Unhealthy Container: \`${containers_unhealthy:-0}\`."
|
|
|
|
|
|
|
|
|
|
if [ "$count" -eq 0 ]; then
|
|
|
|
|
printf '%s\n' "- Keine direkten Ampel-Ausloeser im Summary-Set gefunden."
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
print_notable_observations() {
|
|
|
|
|
local count=0
|
|
|
|
|
|
|
|
|
|
add_observation() {
|
|
|
|
|
printf '%s\n' "- $1"
|
|
|
|
|
count=$((count + 1))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if [ "${traefik_5xx:-0}" != "0" ] && [ "${traefik_5xx:-0}" != "unknown" ]; then
|
|
|
|
|
if [ -n "${traefik_5xx_top:-}" ] && [ "${traefik_5xx_top:-none}" != "none" ]; then
|
|
|
|
|
add_observation "Traefik 5xx: \`${traefik_5xx:-0}\` (Top-Gruppe: \`${traefik_5xx_top}\`)."
|
|
|
|
|
else
|
|
|
|
|
add_observation "Traefik 5xx: \`${traefik_5xx:-0}\`."
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
if [ "${log_highlights:-0}" != "0" ] && [ "${log_highlights:-0}" != "unknown" ]; then
|
|
|
|
|
add_observation "Log-Highlights: \`${log_highlights:-0}\` handlungsrelevante Treffer; Beispiele stehen in der Log-Auswertung."
|
|
|
|
|
fi
|
|
|
|
|
if printf '%s' "${log_volume_total:-0}" | grep -Eq '^[0-9]+$' && [ "${log_volume_total:-0}" -ge "$LOG_VOLUME_OBSERVE_THRESHOLD" ]; then
|
|
|
|
|
add_observation "Log-Volumen: \`${log_volume_total:-0}\` Zeilen im Zeitraum; Top-Verursacher stehen im Log-Volumen-Abschnitt."
|
|
|
|
|
fi
|
|
|
|
|
if [ "${docker_events:-0}" != "0" ] && [ "${docker_events:-0}" != "unknown" ]; then
|
|
|
|
|
add_observation "Docker Critical Events: \`${docker_events:-0}\`."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ "$count" -eq 0 ]; then
|
|
|
|
|
printf '%s\n' "- Keine zusaetzlichen auffaelligen Beobachtungen im Management-Summary."
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
collect_borg() {
|
|
|
|
|
append "## Borg Backup"
|
|
|
|
|
append ""
|
|
|
|
@@ -584,6 +652,7 @@ collect_image_freshness() {
|
|
|
|
|
local image_file="$TMP_DIR/images.tsv"
|
|
|
|
|
local image_warnings=0
|
|
|
|
|
local image_allowed=0
|
|
|
|
|
local image_warning_names=""
|
|
|
|
|
local now_epoch
|
|
|
|
|
: > "$image_file"
|
|
|
|
|
now_epoch="$(date +%s)"
|
|
|
|
@@ -630,6 +699,7 @@ collect_image_freshness() {
|
|
|
|
|
else
|
|
|
|
|
note="ueberaltert"
|
|
|
|
|
image_warnings=$((image_warnings + 1))
|
|
|
|
|
image_warning_names="${image_warning_names:+$image_warning_names,}$name:${age_days}d"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
printf '%d\t%s\t%s\t%s\n' "$age_days" "$name" "$image_tag" "$note" >> "$image_file"
|
|
|
|
@@ -637,6 +707,7 @@ collect_image_freshness() {
|
|
|
|
|
|
|
|
|
|
set_summary "image_warnings" "$image_warnings"
|
|
|
|
|
set_summary "image_allowed" "$image_allowed"
|
|
|
|
|
set_summary "image_warning_names" "$image_warning_names"
|
|
|
|
|
|
|
|
|
|
if [ ! -s "$image_file" ]; then
|
|
|
|
|
append "- Keine Image-Daten verfuegbar."
|
|
|
|
@@ -781,8 +852,16 @@ collect_traefik_5xx() {
|
|
|
|
|
set_summary "traefik_5xx" "$count"
|
|
|
|
|
|
|
|
|
|
if [ "$count" -eq 0 ]; then
|
|
|
|
|
set_summary "traefik_5xx_top" "none"
|
|
|
|
|
append "- Keine 5xx-Antworten."
|
|
|
|
|
else
|
|
|
|
|
local top_group
|
|
|
|
|
top_group="$(awk '{ code=$9; service=$12; gsub(/"/, "", service); counts[service " " code]++ } END { for (k in counts) print counts[k], k }' "$file" \
|
|
|
|
|
| sort -nr \
|
|
|
|
|
| head -n 1 \
|
|
|
|
|
| awk '{ print $2 ":" $3 ":" $1 }' \
|
|
|
|
|
| sed -E 's#[^A-Za-z0-9_.:@/-]+#_#g')"
|
|
|
|
|
set_summary "traefik_5xx_top" "${top_group:-none}"
|
|
|
|
|
append "- 5xx-Antworten: $count"
|
|
|
|
|
append ""
|
|
|
|
|
append "### Gruppiert nach Service/Code"
|
|
|
|
@@ -1181,10 +1260,20 @@ write_report() {
|
|
|
|
|
if [ "$REPORT_STATUS" = "OK" ]; then
|
|
|
|
|
printf 'Im betrachteten Zeitraum zeigt das Homelab eine stabile Betriebslage. Das letzte Borg-Backup ist erfolgreich abgeschlossen, Prometheus meldet keine firing Alerts, keine unhealthy Container, Zertifikate und Storage im erwarteten Bereich.\n\n'
|
|
|
|
|
elif [ "$REPORT_STATUS" = "WARNUNG" ]; then
|
|
|
|
|
printf 'Im betrachteten Zeitraum gibt es Punkte, die Aufmerksamkeit verdienen. Der Betrieb ist nicht automatisch als kompromittiert zu bewerten, aber mindestens ein Signal (Backup, Pending Alert, Zertifikat, Storage, Image-Alter, Drift oder Reboot) weicht vom Normalzustand ab.\n\n'
|
|
|
|
|
printf 'Im betrachteten Zeitraum gibt es Punkte, die Aufmerksamkeit verdienen. Der Betrieb ist nicht automatisch als kompromittiert zu bewerten; die konkreten Ampel-Ausloeser stehen direkt darunter.\n\n'
|
|
|
|
|
else
|
|
|
|
|
printf 'Im betrachteten Zeitraum liegt ein kritisches Betriebssignal vor. Der Bericht sollte zeitnah gelesen und die betroffenen Komponenten priorisiert geprueft werden.\n\n'
|
|
|
|
|
fi
|
|
|
|
|
printf '### Warum dieser Status?\n\n'
|
|
|
|
|
if [ "$REPORT_STATUS" = "OK" ]; then
|
|
|
|
|
printf '%s\n\n' "- Keine Ampel-Ausloeser im Summary-Set."
|
|
|
|
|
else
|
|
|
|
|
print_status_reasons
|
|
|
|
|
printf '\n'
|
|
|
|
|
fi
|
|
|
|
|
printf '### Weitere auffaellige Beobachtungen\n\n'
|
|
|
|
|
print_notable_observations
|
|
|
|
|
printf '\n'
|
|
|
|
|
printf '### Management-Bewertung\n\n'
|
|
|
|
|
printf '%s\n' "- Status: \`$REPORT_STATUS\`"
|
|
|
|
|
printf '%s\n' "- Borg Backup: \`${borg_status:-unknown}\`"
|
|
|
|
|