Files
Micha 8095ab8b5d F-10: automated Authelia repo<->host drift check
New services/authelia-diff.sh compares the access_control: section of the
repo baseline against the live host configuration.yml. OIDC clients,
identity providers, and secret values stay out of scope by design.
Exit codes: 0 ok, 1 drift, 2 file missing, 3 section missing, 4 tool missing.

posture-check.sh gains check_authelia_config_drift, which calls the diff
script and reports drift as warning (not critical). SKIP_AUTHELIA_DRIFT=1
opts out; AUTHELIA_DIFF_SCRIPT overrides the path.

WORKFLOW.md gets a dedicated "Ausnahme: Authelia configuration.yml" section
analogous to the Traefik dynamic-config exception, with the mandatory
repo->host merge workflow and the env-variable contract.

Smoke-tested locally: identical files rc=0, ACL change rc=1 with proper
unified diff, non-ACL change (session.default_redirection_url) correctly
ignored.

Operator follow-up: set up a read-only repo mirror at
/mnt/user/services/homelab-infra/ so the check finds a current baseline.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 09:52:16 +02:00

433 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/last.json}"
NTFY_BASE_URL="${NTFY_BASE_URL:-https://ntfy.kaleschke.info}"
WARNING_TOPIC="${WARNING_TOPIC:-homelab-alerts}"
CRITICAL_TOPIC="${CRITICAL_TOPIC:-homelab-alerts}"
SEND_NTFY="${SEND_NTFY:-1}"
TMP_DIR="${TMP_DIR:-/tmp/kallilab-posture-check}"
ALLOW_DISK1_NTFS="${ALLOW_DISK1_NTFS:-0}"
ALERT_STATE_PATH="${ALERT_STATE_PATH:-/mnt/user/services/posture-check/last-alert.state}"
ALERT_REPEAT_SECONDS="${ALERT_REPEAT_SECONDS:-86400}"
SKIP_AUTHELIA_DRIFT="${SKIP_AUTHELIA_DRIFT:-0}"
AUTHELIA_DIFF_SCRIPT="${AUTHELIA_DIFF_SCRIPT:-/mnt/user/services/homelab-infra/services/authelia-diff.sh}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
: > "$RESULTS_FILE"
cleanup() {
rm -f "$RESULTS_FILE"
}
trap cleanup EXIT
json_escape() {
sed \
-e 's/\\/\\\\/g' \
-e 's/"/\\"/g' \
-e 's/\t/\\t/g'
}
add_result() {
local severity="$1"
local name="$2"
local message="$3"
printf '%s\t%s\t%s\n' "$severity" "$name" "$message" >> "$RESULTS_FILE"
}
need_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
add_result "warning" "command_$1" "Command missing: $1"
return 1
fi
}
check_fstype() {
local path="$1"
local expected="$2"
local severity="$3"
local name="$4"
local actual
if ! command -v findmnt >/dev/null 2>&1; then
add_result "warning" "$name" "Cannot check $path filesystem because findmnt is missing"
return
fi
if ! actual="$(findmnt -no FSTYPE "$path" 2>/dev/null)"; then
add_result "$severity" "$name" "Mount not found: $path"
return
fi
if [ "$actual" = "$expected" ]; then
add_result "ok" "$name" "$path filesystem is $actual"
else
add_result "$severity" "$name" "$path filesystem is $actual, expected $expected"
fi
}
check_disk1_fstype() {
local actual
if ! command -v findmnt >/dev/null 2>&1; then
add_result "warning" "disk1_fstype" "Cannot check /mnt/disk1 filesystem because findmnt is missing"
return
fi
if ! actual="$(findmnt -no FSTYPE "/mnt/disk1" 2>/dev/null)"; then
add_result "warning" "disk1_fstype" "Mount not found: /mnt/disk1"
return
fi
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
if [ "$actual" = "ntfs3" ] || [ "$actual" = "fuseblk" ]; then
add_result "ok" "disk1_fstype" "/mnt/disk1 filesystem is $actual; temporarily allowed until Disk1 phase 2 migration"
else
add_result "warning" "disk1_fstype" "/mnt/disk1 filesystem is $actual, expected ntfs3/fuseblk during temporary Disk1 migration exception"
fi
else
if [ "$actual" = "xfs" ]; then
add_result "ok" "disk1_fstype" "/mnt/disk1 filesystem is $actual"
else
add_result "critical" "disk1_fstype" "/mnt/disk1 filesystem is $actual, expected xfs"
fi
fi
}
check_no_ntfs_on_core_mounts() {
local hits
local pattern="^/mnt/(cache|disk1)(/|$)"
if ! command -v findmnt >/dev/null 2>&1; then
add_result "warning" "no_ntfs_core_mounts" "Cannot check NTFS mounts because findmnt is missing"
return
fi
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
pattern="^/mnt/cache(/|$)"
fi
hits="$(findmnt -rn -o TARGET,FSTYPE 2>/dev/null | awk -v pattern="$pattern" '$1 ~ pattern && ($2 == "ntfs3" || $2 == "fuseblk") { print $1 ":" $2 }' | paste -sd ',' -)"
if [ -n "$hits" ]; then
add_result "critical" "no_ntfs_core_mounts" "NTFS-like filesystem on core mount: $hits"
elif [ "$ALLOW_DISK1_NTFS" = "1" ]; then
add_result "ok" "no_ntfs_core_mounts" "No NTFS on /mnt/cache; /mnt/disk1 NTFS is temporarily allowed until Disk1 phase 2 migration"
else
add_result "ok" "no_ntfs_core_mounts" "No ntfs3/fuseblk mounts below /mnt/cache or /mnt/disk1"
fi
}
check_mover_drift() {
local path="/mnt/disk1/appdata"
if [ ! -d "$path" ]; then
add_result "ok" "mover_drift_appdata" "$path does not exist"
return
fi
if find "$path" -mindepth 1 -print -quit | grep -q .; then
add_result "critical" "mover_drift_appdata" "$path contains entries; appdata should stay cache-only"
else
add_result "ok" "mover_drift_appdata" "$path is empty"
fi
}
check_inode_usage() {
local path="$1"
local max_percent="$2"
local name="$3"
local use_percent
if ! use_percent="$(df -Pi "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
add_result "warning" "$name" "Cannot read inode usage for $path"
return
fi
if ! printf '%s' "$use_percent" | grep -Eq '^[0-9]+$'; then
add_result "warning" "$name" "$path inode usage unavailable (${use_percent:-unknown})"
return
fi
if [ "$use_percent" -lt "$max_percent" ]; then
add_result "ok" "$name" "$path inode usage ${use_percent}%"
else
add_result "warning" "$name" "$path inode usage ${use_percent}% >= ${max_percent}%"
fi
}
check_disk1_inode_usage() {
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
add_result "ok" "disk1_inode_usage" "/mnt/disk1 inode usage skipped; NTFS transition filesystem does not expose POSIX inode usage"
return
fi
check_inode_usage "/mnt/disk1" 80 "disk1_inode_usage"
}
check_filesystem_usage() {
local path="$1"
local max_percent="$2"
local name="$3"
local severity="$4"
local use_percent
if ! use_percent="$(df -P "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
add_result "warning" "$name" "Cannot read filesystem usage for $path"
return
fi
if [ "$use_percent" -lt "$max_percent" ]; then
add_result "ok" "$name" "$path usage ${use_percent}%"
else
add_result "$severity" "$name" "$path usage ${use_percent}% >= ${max_percent}%"
fi
}
check_nvme_smart() {
local device="${NVME_DEVICE:-/dev/nvme0n1}"
local smart
local warning
local percentage_used
local media_errors
if ! need_cmd nvme; then
return
fi
if ! smart="$(nvme smart-log "$device" 2>/dev/null)"; then
add_result "critical" "nvme_smart" "Cannot read nvme smart-log for $device"
return
fi
warning="$(printf '%s\n' "$smart" | awk -F: '/critical_warning/ { gsub(/[[:space:]]/, "", $2); print $2; exit }')"
percentage_used="$(printf '%s\n' "$smart" | awk -F: '/percentage_used/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
media_errors="$(printf '%s\n' "$smart" | awk -F: '/media_errors/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
if [ "${warning:-0}" = "0" ] || [ "${warning:-0}" = "0x00" ]; then
add_result "ok" "nvme_critical_warning" "$device critical_warning ${warning:-0}"
else
add_result "critical" "nvme_critical_warning" "$device critical_warning ${warning}"
fi
if [ -n "${percentage_used:-}" ] && [ "$percentage_used" -lt 80 ]; then
add_result "ok" "nvme_percentage_used" "$device percentage_used ${percentage_used}%"
else
add_result "critical" "nvme_percentage_used" "$device percentage_used ${percentage_used:-unknown}, expected <80"
fi
if [ "${media_errors:-0}" = "0" ]; then
add_result "ok" "nvme_media_errors" "$device media_errors 0"
else
add_result "warning" "nvme_media_errors" "$device media_errors ${media_errors}"
fi
}
check_authelia_config_drift() {
if [ "$SKIP_AUTHELIA_DRIFT" = "1" ]; then
add_result "ok" "authelia_config_drift" "Authelia drift check skipped via SKIP_AUTHELIA_DRIFT=1"
return
fi
if [ ! -x "$AUTHELIA_DIFF_SCRIPT" ] && [ ! -f "$AUTHELIA_DIFF_SCRIPT" ]; then
add_result "warning" "authelia_config_drift" "Authelia diff script missing: $AUTHELIA_DIFF_SCRIPT"
return
fi
local output
local rc
output="$(bash "$AUTHELIA_DIFF_SCRIPT" 2>&1)"
rc=$?
case "$rc" in
0)
add_result "ok" "authelia_config_drift" "Authelia repo baseline matches host config (access_control)"
;;
1)
add_result "warning" "authelia_config_drift" "Authelia repo<->host drift in access_control; run authelia-diff.sh for details"
;;
2)
add_result "warning" "authelia_config_drift" "Authelia diff aborted: $output"
;;
3)
add_result "warning" "authelia_config_drift" "Authelia diff: section missing in repo or host: $output"
;;
*)
add_result "warning" "authelia_config_drift" "Authelia diff returned unexpected rc=$rc: $output"
;;
esac
}
send_ntfy() {
local severity="$1"
local topic="$2"
local body="$3"
if [ "$SEND_NTFY" != "1" ]; then
return
fi
if command -v curl >/dev/null 2>&1; then
printf '%s\n' "$body" | curl -fsS \
-H "Title: KalliLab posture-check $severity" \
-H "Priority: high" \
--data-binary @- \
"$NTFY_BASE_URL/$topic" >/dev/null || true
fi
}
alert_fingerprint() {
awk -F '\t' '$1 != "ok" { printf "%s|%s|%s\n", $1, $2, $3 }' "$RESULTS_FILE" | cksum | awk '{ print $1 ":" $2 }'
}
alert_summary() {
awk -F '\t' '$1 != "ok" { printf "%s:%s; ", $1, $2 }' "$RESULTS_FILE" | sed 's/; $//'
}
should_send_alert() {
local fingerprint="$1"
local now
local last_fingerprint=""
local last_sent="0"
now="$(date +%s)"
if ! printf '%s' "$ALERT_REPEAT_SECONDS" | grep -Eq '^[0-9]+$'; then
ALERT_REPEAT_SECONDS=86400
fi
if [ -f "$ALERT_STATE_PATH" ]; then
IFS="$(printf '\t')" read -r last_fingerprint last_sent < "$ALERT_STATE_PATH" || true
fi
if [ "$fingerprint" != "$last_fingerprint" ]; then
return 0
fi
if ! printf '%s' "$last_sent" | grep -Eq '^[0-9]+$'; then
return 0
fi
if [ $((now - last_sent)) -ge "$ALERT_REPEAT_SECONDS" ]; then
return 0
fi
return 1
}
remember_alert() {
local fingerprint="$1"
local now
now="$(date +%s)"
mkdir -p "$(dirname "$ALERT_STATE_PATH")"
printf '%s\t%s\n' "$fingerprint" "$now" > "$ALERT_STATE_PATH.tmp"
mv "$ALERT_STATE_PATH.tmp" "$ALERT_STATE_PATH"
}
clear_alert_state() {
rm -f "$ALERT_STATE_PATH" "$ALERT_STATE_PATH.tmp"
}
send_alert_once() {
local severity="$1"
local topic="$2"
local body="$3"
local fingerprint
local summary
fingerprint="$(alert_fingerprint)"
summary="$(alert_summary)"
if [ -n "$summary" ]; then
body="$body Checks: $summary"
fi
if should_send_alert "$fingerprint"; then
send_ntfy "$severity" "$topic" "$body"
remember_alert "$fingerprint"
fi
}
write_json() {
local timestamp
local critical_count
local warning_count
local status
local first=1
timestamp="$(date -Iseconds)"
critical_count="$(awk -F '\t' '$1 == "critical" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
if [ "$critical_count" -gt 0 ]; then
status="critical"
elif [ "$warning_count" -gt 0 ]; then
status="warning"
else
status="ok"
fi
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
printf '{\n'
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
printf ' "status": "%s",\n' "$status"
printf ' "critical_count": %s,\n' "$critical_count"
printf ' "warning_count": %s,\n' "$warning_count"
printf ' "checks": [\n'
while IFS="$(printf '\t')" read -r severity name message; do
if [ "$first" -eq 0 ]; then
printf ',\n'
fi
first=0
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
"$(printf '%s' "$severity" | json_escape)" \
"$(printf '%s' "$name" | json_escape)" \
"$(printf '%s' "$message" | json_escape)"
done < "$RESULTS_FILE"
printf '\n ]\n'
printf '}\n'
} > "$OUTPUT_PATH.tmp"
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
cat "$OUTPUT_PATH"
if [ "$status" = "critical" ]; then
send_alert_once "critical" "$CRITICAL_TOPIC" "Posture-check critical: $critical_count critical, $warning_count warning. See $OUTPUT_PATH"
return 2
fi
if [ "$status" = "warning" ]; then
send_alert_once "warning" "$WARNING_TOPIC" "Posture-check warning: $warning_count warning. See $OUTPUT_PATH"
return 1
fi
clear_alert_state
}
main() {
need_cmd findmnt || true
need_cmd df || true
need_cmd awk || true
check_fstype "/mnt/cache" "xfs" "critical" "cache_fstype"
check_disk1_fstype
check_no_ntfs_on_core_mounts
check_mover_drift
check_inode_usage "/mnt/cache" 80 "cache_inode_usage"
check_disk1_inode_usage
check_filesystem_usage "/mnt/cache" 70 "cache_fill_level" "warning"
for share in appdata system domains; do
if [ -e "/mnt/user/$share" ]; then
check_filesystem_usage "/mnt/user/$share" 70 "share_${share}_fill_level" "warning"
else
add_result "warning" "share_${share}_fill_level" "/mnt/user/$share missing"
fi
done
check_nvme_smart
check_authelia_config_drift
write_json
}
main "$@"