8095ab8b5d
New services/authelia-diff.sh compares the access_control: section of the repo baseline against the live host configuration.yml. OIDC clients, identity providers, and secret values stay out of scope by design. Exit codes: 0 ok, 1 drift, 2 file missing, 3 section missing, 4 tool missing. posture-check.sh gains check_authelia_config_drift, which calls the diff script and reports drift as warning (not critical). SKIP_AUTHELIA_DRIFT=1 opts out; AUTHELIA_DIFF_SCRIPT overrides the path. WORKFLOW.md gets a dedicated "Ausnahme: Authelia configuration.yml" section analogous to the Traefik dynamic-config exception, with the mandatory repo->host merge workflow and the env-variable contract. Smoke-tested locally: identical files rc=0, ACL change rc=1 with proper unified diff, non-ACL change (session.default_redirection_url) correctly ignored. Operator follow-up: set up a read-only repo mirror at /mnt/user/services/homelab-infra/ so the check finds a current baseline. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
433 lines
12 KiB
Bash
Executable File
433 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/last.json}"
|
|
NTFY_BASE_URL="${NTFY_BASE_URL:-https://ntfy.kaleschke.info}"
|
|
WARNING_TOPIC="${WARNING_TOPIC:-homelab-alerts}"
|
|
CRITICAL_TOPIC="${CRITICAL_TOPIC:-homelab-alerts}"
|
|
SEND_NTFY="${SEND_NTFY:-1}"
|
|
TMP_DIR="${TMP_DIR:-/tmp/kallilab-posture-check}"
|
|
ALLOW_DISK1_NTFS="${ALLOW_DISK1_NTFS:-0}"
|
|
ALERT_STATE_PATH="${ALERT_STATE_PATH:-/mnt/user/services/posture-check/last-alert.state}"
|
|
ALERT_REPEAT_SECONDS="${ALERT_REPEAT_SECONDS:-86400}"
|
|
SKIP_AUTHELIA_DRIFT="${SKIP_AUTHELIA_DRIFT:-0}"
|
|
AUTHELIA_DIFF_SCRIPT="${AUTHELIA_DIFF_SCRIPT:-/mnt/user/services/homelab-infra/services/authelia-diff.sh}"
|
|
|
|
mkdir -p "$TMP_DIR"
|
|
RESULTS_FILE="$TMP_DIR/results.$$"
|
|
: > "$RESULTS_FILE"
|
|
|
|
cleanup() {
|
|
rm -f "$RESULTS_FILE"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
json_escape() {
|
|
sed \
|
|
-e 's/\\/\\\\/g' \
|
|
-e 's/"/\\"/g' \
|
|
-e 's/\t/\\t/g'
|
|
}
|
|
|
|
add_result() {
|
|
local severity="$1"
|
|
local name="$2"
|
|
local message="$3"
|
|
printf '%s\t%s\t%s\n' "$severity" "$name" "$message" >> "$RESULTS_FILE"
|
|
}
|
|
|
|
need_cmd() {
|
|
if ! command -v "$1" >/dev/null 2>&1; then
|
|
add_result "warning" "command_$1" "Command missing: $1"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
check_fstype() {
|
|
local path="$1"
|
|
local expected="$2"
|
|
local severity="$3"
|
|
local name="$4"
|
|
local actual
|
|
|
|
if ! command -v findmnt >/dev/null 2>&1; then
|
|
add_result "warning" "$name" "Cannot check $path filesystem because findmnt is missing"
|
|
return
|
|
fi
|
|
|
|
if ! actual="$(findmnt -no FSTYPE "$path" 2>/dev/null)"; then
|
|
add_result "$severity" "$name" "Mount not found: $path"
|
|
return
|
|
fi
|
|
|
|
if [ "$actual" = "$expected" ]; then
|
|
add_result "ok" "$name" "$path filesystem is $actual"
|
|
else
|
|
add_result "$severity" "$name" "$path filesystem is $actual, expected $expected"
|
|
fi
|
|
}
|
|
|
|
check_disk1_fstype() {
|
|
local actual
|
|
|
|
if ! command -v findmnt >/dev/null 2>&1; then
|
|
add_result "warning" "disk1_fstype" "Cannot check /mnt/disk1 filesystem because findmnt is missing"
|
|
return
|
|
fi
|
|
|
|
if ! actual="$(findmnt -no FSTYPE "/mnt/disk1" 2>/dev/null)"; then
|
|
add_result "warning" "disk1_fstype" "Mount not found: /mnt/disk1"
|
|
return
|
|
fi
|
|
|
|
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
if [ "$actual" = "ntfs3" ] || [ "$actual" = "fuseblk" ]; then
|
|
add_result "ok" "disk1_fstype" "/mnt/disk1 filesystem is $actual; temporarily allowed until Disk1 phase 2 migration"
|
|
else
|
|
add_result "warning" "disk1_fstype" "/mnt/disk1 filesystem is $actual, expected ntfs3/fuseblk during temporary Disk1 migration exception"
|
|
fi
|
|
else
|
|
if [ "$actual" = "xfs" ]; then
|
|
add_result "ok" "disk1_fstype" "/mnt/disk1 filesystem is $actual"
|
|
else
|
|
add_result "critical" "disk1_fstype" "/mnt/disk1 filesystem is $actual, expected xfs"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
check_no_ntfs_on_core_mounts() {
|
|
local hits
|
|
local pattern="^/mnt/(cache|disk1)(/|$)"
|
|
|
|
if ! command -v findmnt >/dev/null 2>&1; then
|
|
add_result "warning" "no_ntfs_core_mounts" "Cannot check NTFS mounts because findmnt is missing"
|
|
return
|
|
fi
|
|
|
|
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
pattern="^/mnt/cache(/|$)"
|
|
fi
|
|
|
|
hits="$(findmnt -rn -o TARGET,FSTYPE 2>/dev/null | awk -v pattern="$pattern" '$1 ~ pattern && ($2 == "ntfs3" || $2 == "fuseblk") { print $1 ":" $2 }' | paste -sd ',' -)"
|
|
if [ -n "$hits" ]; then
|
|
add_result "critical" "no_ntfs_core_mounts" "NTFS-like filesystem on core mount: $hits"
|
|
elif [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
add_result "ok" "no_ntfs_core_mounts" "No NTFS on /mnt/cache; /mnt/disk1 NTFS is temporarily allowed until Disk1 phase 2 migration"
|
|
else
|
|
add_result "ok" "no_ntfs_core_mounts" "No ntfs3/fuseblk mounts below /mnt/cache or /mnt/disk1"
|
|
fi
|
|
}
|
|
|
|
check_mover_drift() {
|
|
local path="/mnt/disk1/appdata"
|
|
if [ ! -d "$path" ]; then
|
|
add_result "ok" "mover_drift_appdata" "$path does not exist"
|
|
return
|
|
fi
|
|
|
|
if find "$path" -mindepth 1 -print -quit | grep -q .; then
|
|
add_result "critical" "mover_drift_appdata" "$path contains entries; appdata should stay cache-only"
|
|
else
|
|
add_result "ok" "mover_drift_appdata" "$path is empty"
|
|
fi
|
|
}
|
|
|
|
check_inode_usage() {
|
|
local path="$1"
|
|
local max_percent="$2"
|
|
local name="$3"
|
|
local use_percent
|
|
|
|
if ! use_percent="$(df -Pi "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
|
|
add_result "warning" "$name" "Cannot read inode usage for $path"
|
|
return
|
|
fi
|
|
if ! printf '%s' "$use_percent" | grep -Eq '^[0-9]+$'; then
|
|
add_result "warning" "$name" "$path inode usage unavailable (${use_percent:-unknown})"
|
|
return
|
|
fi
|
|
|
|
if [ "$use_percent" -lt "$max_percent" ]; then
|
|
add_result "ok" "$name" "$path inode usage ${use_percent}%"
|
|
else
|
|
add_result "warning" "$name" "$path inode usage ${use_percent}% >= ${max_percent}%"
|
|
fi
|
|
}
|
|
|
|
check_disk1_inode_usage() {
|
|
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
add_result "ok" "disk1_inode_usage" "/mnt/disk1 inode usage skipped; NTFS transition filesystem does not expose POSIX inode usage"
|
|
return
|
|
fi
|
|
|
|
check_inode_usage "/mnt/disk1" 80 "disk1_inode_usage"
|
|
}
|
|
|
|
check_filesystem_usage() {
|
|
local path="$1"
|
|
local max_percent="$2"
|
|
local name="$3"
|
|
local severity="$4"
|
|
local use_percent
|
|
|
|
if ! use_percent="$(df -P "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
|
|
add_result "warning" "$name" "Cannot read filesystem usage for $path"
|
|
return
|
|
fi
|
|
|
|
if [ "$use_percent" -lt "$max_percent" ]; then
|
|
add_result "ok" "$name" "$path usage ${use_percent}%"
|
|
else
|
|
add_result "$severity" "$name" "$path usage ${use_percent}% >= ${max_percent}%"
|
|
fi
|
|
}
|
|
|
|
check_nvme_smart() {
|
|
local device="${NVME_DEVICE:-/dev/nvme0n1}"
|
|
local smart
|
|
local warning
|
|
local percentage_used
|
|
local media_errors
|
|
|
|
if ! need_cmd nvme; then
|
|
return
|
|
fi
|
|
|
|
if ! smart="$(nvme smart-log "$device" 2>/dev/null)"; then
|
|
add_result "critical" "nvme_smart" "Cannot read nvme smart-log for $device"
|
|
return
|
|
fi
|
|
|
|
warning="$(printf '%s\n' "$smart" | awk -F: '/critical_warning/ { gsub(/[[:space:]]/, "", $2); print $2; exit }')"
|
|
percentage_used="$(printf '%s\n' "$smart" | awk -F: '/percentage_used/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
|
|
media_errors="$(printf '%s\n' "$smart" | awk -F: '/media_errors/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
|
|
|
|
if [ "${warning:-0}" = "0" ] || [ "${warning:-0}" = "0x00" ]; then
|
|
add_result "ok" "nvme_critical_warning" "$device critical_warning ${warning:-0}"
|
|
else
|
|
add_result "critical" "nvme_critical_warning" "$device critical_warning ${warning}"
|
|
fi
|
|
|
|
if [ -n "${percentage_used:-}" ] && [ "$percentage_used" -lt 80 ]; then
|
|
add_result "ok" "nvme_percentage_used" "$device percentage_used ${percentage_used}%"
|
|
else
|
|
add_result "critical" "nvme_percentage_used" "$device percentage_used ${percentage_used:-unknown}, expected <80"
|
|
fi
|
|
|
|
if [ "${media_errors:-0}" = "0" ]; then
|
|
add_result "ok" "nvme_media_errors" "$device media_errors 0"
|
|
else
|
|
add_result "warning" "nvme_media_errors" "$device media_errors ${media_errors}"
|
|
fi
|
|
}
|
|
|
|
check_authelia_config_drift() {
|
|
if [ "$SKIP_AUTHELIA_DRIFT" = "1" ]; then
|
|
add_result "ok" "authelia_config_drift" "Authelia drift check skipped via SKIP_AUTHELIA_DRIFT=1"
|
|
return
|
|
fi
|
|
|
|
if [ ! -x "$AUTHELIA_DIFF_SCRIPT" ] && [ ! -f "$AUTHELIA_DIFF_SCRIPT" ]; then
|
|
add_result "warning" "authelia_config_drift" "Authelia diff script missing: $AUTHELIA_DIFF_SCRIPT"
|
|
return
|
|
fi
|
|
|
|
local output
|
|
local rc
|
|
output="$(bash "$AUTHELIA_DIFF_SCRIPT" 2>&1)"
|
|
rc=$?
|
|
|
|
case "$rc" in
|
|
0)
|
|
add_result "ok" "authelia_config_drift" "Authelia repo baseline matches host config (access_control)"
|
|
;;
|
|
1)
|
|
add_result "warning" "authelia_config_drift" "Authelia repo<->host drift in access_control; run authelia-diff.sh for details"
|
|
;;
|
|
2)
|
|
add_result "warning" "authelia_config_drift" "Authelia diff aborted: $output"
|
|
;;
|
|
3)
|
|
add_result "warning" "authelia_config_drift" "Authelia diff: section missing in repo or host: $output"
|
|
;;
|
|
*)
|
|
add_result "warning" "authelia_config_drift" "Authelia diff returned unexpected rc=$rc: $output"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
send_ntfy() {
|
|
local severity="$1"
|
|
local topic="$2"
|
|
local body="$3"
|
|
|
|
if [ "$SEND_NTFY" != "1" ]; then
|
|
return
|
|
fi
|
|
|
|
if command -v curl >/dev/null 2>&1; then
|
|
printf '%s\n' "$body" | curl -fsS \
|
|
-H "Title: KalliLab posture-check $severity" \
|
|
-H "Priority: high" \
|
|
--data-binary @- \
|
|
"$NTFY_BASE_URL/$topic" >/dev/null || true
|
|
fi
|
|
}
|
|
|
|
alert_fingerprint() {
|
|
awk -F '\t' '$1 != "ok" { printf "%s|%s|%s\n", $1, $2, $3 }' "$RESULTS_FILE" | cksum | awk '{ print $1 ":" $2 }'
|
|
}
|
|
|
|
alert_summary() {
|
|
awk -F '\t' '$1 != "ok" { printf "%s:%s; ", $1, $2 }' "$RESULTS_FILE" | sed 's/; $//'
|
|
}
|
|
|
|
should_send_alert() {
|
|
local fingerprint="$1"
|
|
local now
|
|
local last_fingerprint=""
|
|
local last_sent="0"
|
|
|
|
now="$(date +%s)"
|
|
|
|
if ! printf '%s' "$ALERT_REPEAT_SECONDS" | grep -Eq '^[0-9]+$'; then
|
|
ALERT_REPEAT_SECONDS=86400
|
|
fi
|
|
|
|
if [ -f "$ALERT_STATE_PATH" ]; then
|
|
IFS="$(printf '\t')" read -r last_fingerprint last_sent < "$ALERT_STATE_PATH" || true
|
|
fi
|
|
|
|
if [ "$fingerprint" != "$last_fingerprint" ]; then
|
|
return 0
|
|
fi
|
|
|
|
if ! printf '%s' "$last_sent" | grep -Eq '^[0-9]+$'; then
|
|
return 0
|
|
fi
|
|
|
|
if [ $((now - last_sent)) -ge "$ALERT_REPEAT_SECONDS" ]; then
|
|
return 0
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
remember_alert() {
|
|
local fingerprint="$1"
|
|
local now
|
|
|
|
now="$(date +%s)"
|
|
mkdir -p "$(dirname "$ALERT_STATE_PATH")"
|
|
printf '%s\t%s\n' "$fingerprint" "$now" > "$ALERT_STATE_PATH.tmp"
|
|
mv "$ALERT_STATE_PATH.tmp" "$ALERT_STATE_PATH"
|
|
}
|
|
|
|
clear_alert_state() {
|
|
rm -f "$ALERT_STATE_PATH" "$ALERT_STATE_PATH.tmp"
|
|
}
|
|
|
|
send_alert_once() {
|
|
local severity="$1"
|
|
local topic="$2"
|
|
local body="$3"
|
|
local fingerprint
|
|
local summary
|
|
|
|
fingerprint="$(alert_fingerprint)"
|
|
summary="$(alert_summary)"
|
|
|
|
if [ -n "$summary" ]; then
|
|
body="$body Checks: $summary"
|
|
fi
|
|
|
|
if should_send_alert "$fingerprint"; then
|
|
send_ntfy "$severity" "$topic" "$body"
|
|
remember_alert "$fingerprint"
|
|
fi
|
|
}
|
|
|
|
write_json() {
|
|
local timestamp
|
|
local critical_count
|
|
local warning_count
|
|
local status
|
|
local first=1
|
|
|
|
timestamp="$(date -Iseconds)"
|
|
critical_count="$(awk -F '\t' '$1 == "critical" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
|
|
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
|
|
|
|
if [ "$critical_count" -gt 0 ]; then
|
|
status="critical"
|
|
elif [ "$warning_count" -gt 0 ]; then
|
|
status="warning"
|
|
else
|
|
status="ok"
|
|
fi
|
|
|
|
mkdir -p "$(dirname "$OUTPUT_PATH")"
|
|
{
|
|
printf '{\n'
|
|
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
|
|
printf ' "status": "%s",\n' "$status"
|
|
printf ' "critical_count": %s,\n' "$critical_count"
|
|
printf ' "warning_count": %s,\n' "$warning_count"
|
|
printf ' "checks": [\n'
|
|
while IFS="$(printf '\t')" read -r severity name message; do
|
|
if [ "$first" -eq 0 ]; then
|
|
printf ',\n'
|
|
fi
|
|
first=0
|
|
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
|
|
"$(printf '%s' "$severity" | json_escape)" \
|
|
"$(printf '%s' "$name" | json_escape)" \
|
|
"$(printf '%s' "$message" | json_escape)"
|
|
done < "$RESULTS_FILE"
|
|
printf '\n ]\n'
|
|
printf '}\n'
|
|
} > "$OUTPUT_PATH.tmp"
|
|
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
|
|
|
|
cat "$OUTPUT_PATH"
|
|
|
|
if [ "$status" = "critical" ]; then
|
|
send_alert_once "critical" "$CRITICAL_TOPIC" "Posture-check critical: $critical_count critical, $warning_count warning. See $OUTPUT_PATH"
|
|
return 2
|
|
fi
|
|
if [ "$status" = "warning" ]; then
|
|
send_alert_once "warning" "$WARNING_TOPIC" "Posture-check warning: $warning_count warning. See $OUTPUT_PATH"
|
|
return 1
|
|
fi
|
|
|
|
clear_alert_state
|
|
}
|
|
|
|
main() {
|
|
need_cmd findmnt || true
|
|
need_cmd df || true
|
|
need_cmd awk || true
|
|
|
|
check_fstype "/mnt/cache" "xfs" "critical" "cache_fstype"
|
|
check_disk1_fstype
|
|
check_no_ntfs_on_core_mounts
|
|
check_mover_drift
|
|
check_inode_usage "/mnt/cache" 80 "cache_inode_usage"
|
|
check_disk1_inode_usage
|
|
check_filesystem_usage "/mnt/cache" 70 "cache_fill_level" "warning"
|
|
|
|
for share in appdata system domains; do
|
|
if [ -e "/mnt/user/$share" ]; then
|
|
check_filesystem_usage "/mnt/user/$share" 70 "share_${share}_fill_level" "warning"
|
|
else
|
|
add_result "warning" "share_${share}_fill_level" "/mnt/user/$share missing"
|
|
fi
|
|
done
|
|
|
|
check_nvme_smart
|
|
check_authelia_config_drift
|
|
write_json
|
|
}
|
|
|
|
main "$@"
|