285 lines
8.2 KiB
Bash
Executable File
285 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/last.json}"
|
|
NTFY_BASE_URL="${NTFY_BASE_URL:-https://ntfy.kaleschke.info}"
|
|
WARNING_TOPIC="${WARNING_TOPIC:-homelab-alerts}"
|
|
CRITICAL_TOPIC="${CRITICAL_TOPIC:-homelab-alerts}"
|
|
SEND_NTFY="${SEND_NTFY:-1}"
|
|
TMP_DIR="${TMP_DIR:-/tmp/kallilab-posture-check}"
|
|
ALLOW_DISK1_NTFS="${ALLOW_DISK1_NTFS:-1}"
|
|
|
|
mkdir -p "$TMP_DIR"
|
|
RESULTS_FILE="$TMP_DIR/results.$$"
|
|
: > "$RESULTS_FILE"
|
|
|
|
cleanup() {
|
|
rm -f "$RESULTS_FILE"
|
|
}
|
|
trap cleanup EXIT
|
|
|
|
json_escape() {
|
|
sed \
|
|
-e 's/\\/\\\\/g' \
|
|
-e 's/"/\\"/g' \
|
|
-e 's/\t/\\t/g'
|
|
}
|
|
|
|
add_result() {
|
|
local severity="$1"
|
|
local name="$2"
|
|
local message="$3"
|
|
printf '%s\t%s\t%s\n' "$severity" "$name" "$message" >> "$RESULTS_FILE"
|
|
}
|
|
|
|
need_cmd() {
|
|
if ! command -v "$1" >/dev/null 2>&1; then
|
|
add_result "warning" "command_$1" "Command missing: $1"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
check_fstype() {
|
|
local path="$1"
|
|
local expected="$2"
|
|
local severity="$3"
|
|
local name="$4"
|
|
local actual
|
|
|
|
if ! command -v findmnt >/dev/null 2>&1; then
|
|
add_result "warning" "$name" "Cannot check $path filesystem because findmnt is missing"
|
|
return
|
|
fi
|
|
|
|
if ! actual="$(findmnt -no FSTYPE "$path" 2>/dev/null)"; then
|
|
add_result "$severity" "$name" "Mount not found: $path"
|
|
return
|
|
fi
|
|
|
|
if [ "$actual" = "$expected" ]; then
|
|
add_result "ok" "$name" "$path filesystem is $actual"
|
|
else
|
|
add_result "$severity" "$name" "$path filesystem is $actual, expected $expected"
|
|
fi
|
|
}
|
|
|
|
check_no_ntfs_on_core_mounts() {
|
|
local hits
|
|
local pattern="^/mnt/(cache|disk1)(/|$)"
|
|
|
|
if ! command -v findmnt >/dev/null 2>&1; then
|
|
add_result "warning" "no_ntfs_core_mounts" "Cannot check NTFS mounts because findmnt is missing"
|
|
return
|
|
fi
|
|
|
|
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
pattern="^/mnt/cache(/|$)"
|
|
fi
|
|
|
|
hits="$(findmnt -rn -o TARGET,FSTYPE 2>/dev/null | awk -v pattern="$pattern" '$1 ~ pattern && ($2 == "ntfs3" || $2 == "fuseblk") { print $1 ":" $2 }' | paste -sd ',' -)"
|
|
if [ -n "$hits" ]; then
|
|
add_result "critical" "no_ntfs_core_mounts" "NTFS-like filesystem on core mount: $hits"
|
|
elif [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
add_result "warning" "no_ntfs_core_mounts" "No NTFS on /mnt/cache; /mnt/disk1 NTFS is temporarily allowed until Disk1 phase 2 migration"
|
|
else
|
|
add_result "ok" "no_ntfs_core_mounts" "No ntfs3/fuseblk mounts below /mnt/cache or /mnt/disk1"
|
|
fi
|
|
}
|
|
|
|
check_mover_drift() {
|
|
local path="/mnt/disk1/appdata"
|
|
if [ ! -d "$path" ]; then
|
|
add_result "ok" "mover_drift_appdata" "$path does not exist"
|
|
return
|
|
fi
|
|
|
|
if find "$path" -mindepth 1 -print -quit | grep -q .; then
|
|
add_result "critical" "mover_drift_appdata" "$path contains entries; appdata should stay cache-only"
|
|
else
|
|
add_result "ok" "mover_drift_appdata" "$path is empty"
|
|
fi
|
|
}
|
|
|
|
check_inode_usage() {
|
|
local path="$1"
|
|
local max_percent="$2"
|
|
local name="$3"
|
|
local use_percent
|
|
|
|
if ! use_percent="$(df -Pi "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
|
|
add_result "warning" "$name" "Cannot read inode usage for $path"
|
|
return
|
|
fi
|
|
if ! printf '%s' "$use_percent" | grep -Eq '^[0-9]+$'; then
|
|
add_result "warning" "$name" "$path inode usage unavailable (${use_percent:-unknown})"
|
|
return
|
|
fi
|
|
|
|
if [ "$use_percent" -lt "$max_percent" ]; then
|
|
add_result "ok" "$name" "$path inode usage ${use_percent}%"
|
|
else
|
|
add_result "warning" "$name" "$path inode usage ${use_percent}% >= ${max_percent}%"
|
|
fi
|
|
}
|
|
|
|
check_filesystem_usage() {
|
|
local path="$1"
|
|
local max_percent="$2"
|
|
local name="$3"
|
|
local severity="$4"
|
|
local use_percent
|
|
|
|
if ! use_percent="$(df -P "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
|
|
add_result "warning" "$name" "Cannot read filesystem usage for $path"
|
|
return
|
|
fi
|
|
|
|
if [ "$use_percent" -lt "$max_percent" ]; then
|
|
add_result "ok" "$name" "$path usage ${use_percent}%"
|
|
else
|
|
add_result "$severity" "$name" "$path usage ${use_percent}% >= ${max_percent}%"
|
|
fi
|
|
}
|
|
|
|
check_nvme_smart() {
|
|
local device="${NVME_DEVICE:-/dev/nvme0n1}"
|
|
local smart
|
|
local warning
|
|
local percentage_used
|
|
local media_errors
|
|
|
|
if ! need_cmd nvme; then
|
|
return
|
|
fi
|
|
|
|
if ! smart="$(nvme smart-log "$device" 2>/dev/null)"; then
|
|
add_result "critical" "nvme_smart" "Cannot read nvme smart-log for $device"
|
|
return
|
|
fi
|
|
|
|
warning="$(printf '%s\n' "$smart" | awk -F: '/critical_warning/ { gsub(/[[:space:]]/, "", $2); print $2; exit }')"
|
|
percentage_used="$(printf '%s\n' "$smart" | awk -F: '/percentage_used/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
|
|
media_errors="$(printf '%s\n' "$smart" | awk -F: '/media_errors/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
|
|
|
|
if [ "${warning:-0}" = "0" ] || [ "${warning:-0}" = "0x00" ]; then
|
|
add_result "ok" "nvme_critical_warning" "$device critical_warning ${warning:-0}"
|
|
else
|
|
add_result "critical" "nvme_critical_warning" "$device critical_warning ${warning}"
|
|
fi
|
|
|
|
if [ -n "${percentage_used:-}" ] && [ "$percentage_used" -lt 80 ]; then
|
|
add_result "ok" "nvme_percentage_used" "$device percentage_used ${percentage_used}%"
|
|
else
|
|
add_result "critical" "nvme_percentage_used" "$device percentage_used ${percentage_used:-unknown}, expected <80"
|
|
fi
|
|
|
|
if [ "${media_errors:-0}" = "0" ]; then
|
|
add_result "ok" "nvme_media_errors" "$device media_errors 0"
|
|
else
|
|
add_result "warning" "nvme_media_errors" "$device media_errors ${media_errors}"
|
|
fi
|
|
}
|
|
|
|
send_ntfy() {
|
|
local severity="$1"
|
|
local topic="$2"
|
|
local body="$3"
|
|
|
|
if [ "$SEND_NTFY" != "1" ]; then
|
|
return
|
|
fi
|
|
|
|
if command -v curl >/dev/null 2>&1; then
|
|
printf '%s\n' "$body" | curl -fsS \
|
|
-H "Title: KalliLab posture-check $severity" \
|
|
-H "Priority: high" \
|
|
--data-binary @- \
|
|
"$NTFY_BASE_URL/$topic" >/dev/null || true
|
|
fi
|
|
}
|
|
|
|
write_json() {
|
|
local timestamp
|
|
local critical_count
|
|
local warning_count
|
|
local status
|
|
local first=1
|
|
|
|
timestamp="$(date -Iseconds)"
|
|
critical_count="$(awk -F '\t' '$1 == "critical" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
|
|
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
|
|
|
|
if [ "$critical_count" -gt 0 ]; then
|
|
status="critical"
|
|
elif [ "$warning_count" -gt 0 ]; then
|
|
status="warning"
|
|
else
|
|
status="ok"
|
|
fi
|
|
|
|
mkdir -p "$(dirname "$OUTPUT_PATH")"
|
|
{
|
|
printf '{\n'
|
|
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
|
|
printf ' "status": "%s",\n' "$status"
|
|
printf ' "critical_count": %s,\n' "$critical_count"
|
|
printf ' "warning_count": %s,\n' "$warning_count"
|
|
printf ' "checks": [\n'
|
|
while IFS="$(printf '\t')" read -r severity name message; do
|
|
if [ "$first" -eq 0 ]; then
|
|
printf ',\n'
|
|
fi
|
|
first=0
|
|
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
|
|
"$(printf '%s' "$severity" | json_escape)" \
|
|
"$(printf '%s' "$name" | json_escape)" \
|
|
"$(printf '%s' "$message" | json_escape)"
|
|
done < "$RESULTS_FILE"
|
|
printf '\n ]\n'
|
|
printf '}\n'
|
|
} > "$OUTPUT_PATH.tmp"
|
|
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
|
|
|
|
cat "$OUTPUT_PATH"
|
|
|
|
if [ "$status" = "critical" ]; then
|
|
send_ntfy "critical" "$CRITICAL_TOPIC" "Posture-check critical: $critical_count critical, $warning_count warning. See $OUTPUT_PATH"
|
|
return 2
|
|
fi
|
|
if [ "$status" = "warning" ]; then
|
|
send_ntfy "warning" "$WARNING_TOPIC" "Posture-check warning: $warning_count warning. See $OUTPUT_PATH"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
need_cmd findmnt || true
|
|
need_cmd df || true
|
|
need_cmd awk || true
|
|
|
|
check_fstype "/mnt/cache" "xfs" "critical" "cache_fstype"
|
|
if [ "$ALLOW_DISK1_NTFS" = "1" ]; then
|
|
check_fstype "/mnt/disk1" "ntfs3" "warning" "disk1_fstype"
|
|
else
|
|
check_fstype "/mnt/disk1" "xfs" "critical" "disk1_fstype"
|
|
fi
|
|
check_no_ntfs_on_core_mounts
|
|
check_mover_drift
|
|
check_inode_usage "/mnt/cache" 80 "cache_inode_usage"
|
|
check_inode_usage "/mnt/disk1" 80 "disk1_inode_usage"
|
|
check_filesystem_usage "/mnt/cache" 70 "cache_fill_level" "warning"
|
|
|
|
for share in appdata system domains; do
|
|
if [ -e "/mnt/user/$share" ]; then
|
|
check_filesystem_usage "/mnt/user/$share" 70 "share_${share}_fill_level" "warning"
|
|
else
|
|
add_result "warning" "share_${share}_fill_level" "/mnt/user/$share missing"
|
|
fi
|
|
done
|
|
|
|
check_nvme_smart
|
|
write_json
|
|
}
|
|
|
|
main "$@"
|