cd650b19ac
Operational hardening across several services after live incident analysis between 2026-05-18 and 2026-05-20: - Gitea: disable public registration and OpenID signup/signin to stop the external POST / 5xx bursts that triggered availability alerts. New repo-wide policy requires every productive Micha/homelab-infra Komodo stack to ship with an active Gitea->Komodo webhook on the current stack ID (documented in CLAUDE.md, AI_CONTEXT.md, WORKFLOW.md). - posture-check: extract the Disk1 fstype check into its own function so the documented Disk1 NTFS exception no longer raises ntfy warnings, skip POSIX inode checks on NTFS, and dedup ntfy alerts via a fingerprint state file with ALERT_REPEAT_SECONDS (default 24h). Repeat-spam on the same cause now suppressed. - docker-critical-events: parse the event JSON for container name, action, exit code and signal; drop `die exit=0` events (clean stops); ship a structured ntfy message instead of the raw event line. - Borg UI: mount /mnt/user/services into the backup container as /local/services:ro and include homelab-infra, stacks and posture-check in all-important-sources.txt. RESTORE_MATRIX and DISASTER_RECOVERY updated accordingly. - Unraid user scripts: document the new homelab-operations-report-daily cron job and the SMTP password file it expects on the host. - MIGRATION_LOG: capture the four live events from this window - Gitea 5xx burst + signup closure, Komodo webhook reconciliation, posture-check host-version verification, Borg scope extension, and Traefik 5xx alert detuning. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
99 lines
2.5 KiB
Bash
Executable File
99 lines
2.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
NTFY_SCRIPT="${NTFY_SCRIPT:-/mnt/user/services/homelab-infra/ops/restore-tests/send-ntfy.sh}"
|
|
NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}"
|
|
SEND_NTFY="${SEND_NTFY:-1}"
|
|
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/docker-critical-events-last.log}"
|
|
EVENT_FILTERS="${EVENT_FILTERS:---filter event=die --filter event=oom --filter event=kill}"
|
|
|
|
mkdir -p "$(dirname "$OUTPUT_PATH")"
|
|
|
|
json_value() {
|
|
local key="$1"
|
|
local json="$2"
|
|
|
|
printf '%s' "$json" | sed -n "s/.*\"$key\":\"\\([^\"]*\\)\".*/\\1/p" | head -n 1
|
|
}
|
|
|
|
event_summary() {
|
|
local event="$1"
|
|
local action name image exit_code signal
|
|
|
|
action="$(json_value "Action" "$event")"
|
|
name="$(json_value "name" "$event")"
|
|
image="$(json_value "image" "$event")"
|
|
exit_code="$(json_value "exitCode" "$event")"
|
|
signal="$(json_value "signal" "$event")"
|
|
|
|
printf 'Container: %s\nAction: %s\nImage: %s\nExit-Code: %s\nSignal: %s\n\nFull event logged in: %s\n' \
|
|
"${name:-unknown}" \
|
|
"${action:-unknown}" \
|
|
"${image:-unknown}" \
|
|
"${exit_code:-n/a}" \
|
|
"${signal:-n/a}" \
|
|
"$OUTPUT_PATH"
|
|
}
|
|
|
|
event_title() {
|
|
local event="$1"
|
|
local action name exit_code
|
|
|
|
action="$(json_value "Action" "$event")"
|
|
name="$(json_value "name" "$event")"
|
|
exit_code="$(json_value "exitCode" "$event")"
|
|
|
|
if [ -n "$exit_code" ]; then
|
|
printf 'Docker critical: %s %s exit=%s' "${name:-unknown}" "${action:-event}" "$exit_code"
|
|
else
|
|
printf 'Docker critical: %s %s' "${name:-unknown}" "${action:-event}"
|
|
fi
|
|
}
|
|
|
|
should_send_event() {
|
|
local event="$1"
|
|
local action exit_code
|
|
|
|
action="$(json_value "Action" "$event")"
|
|
exit_code="$(json_value "exitCode" "$event")"
|
|
|
|
case "$action" in
|
|
die)
|
|
[ "${exit_code:-}" != "0" ]
|
|
;;
|
|
oom|kill)
|
|
return 0
|
|
;;
|
|
*)
|
|
return 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
send_event() {
|
|
local line="$1"
|
|
local title message
|
|
local timestamp
|
|
timestamp="$(date -Iseconds)"
|
|
title="$(event_title "$line")"
|
|
message="$(event_summary "$line")"
|
|
|
|
printf '%s %s\n' "$timestamp" "$line" | tee -a "$OUTPUT_PATH" >/dev/null
|
|
|
|
if [ "$SEND_NTFY" = "1" ] && [ -f "$NTFY_SCRIPT" ]; then
|
|
bash "$NTFY_SCRIPT" "$NTFY_TOPIC" "$title" "$message" high || true
|
|
fi
|
|
}
|
|
|
|
if ! command -v docker >/dev/null 2>&1; then
|
|
echo "docker command not found" >&2
|
|
exit 127
|
|
fi
|
|
|
|
# shellcheck disable=SC2086
|
|
docker events $EVENT_FILTERS --format '{{json .}}' | while IFS= read -r event; do
|
|
[ -n "$event" ] || continue
|
|
should_send_event "$event" || continue
|
|
send_event "$event"
|
|
done
|