Harden backup and posture checks

This commit is contained in:
2026-05-16 13:04:22 +02:00
parent 12a87ad342
commit 878ad2d5f1
25 changed files with 716 additions and 54 deletions
+144
View File
@@ -0,0 +1,144 @@
#!/usr/bin/env bash
set -euo pipefail
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/cert-token-last.json}"
NTFY_BASE_URL="${NTFY_BASE_URL:-https://ntfy.sh}"
WARNING_TOPIC="${WARNING_TOPIC:-kallilab-warning}"
CRITICAL_TOPIC="${CRITICAL_TOPIC:-kallilab-critical}"
SEND_NTFY="${SEND_NTFY:-1}"
CLOUDFLARE_TOKEN_FILE="${CLOUDFLARE_TOKEN_FILE:-/mnt/user/appdata/traefik/secrets/cloudflare_dns_api_token}"
WARN_DAYS="${WARN_DAYS:-14}"
CRITICAL_DAYS="${CRITICAL_DAYS:-7}"
DOMAINS="${DOMAINS:-traefik.kaleschke.info auth.kaleschke.info vault.kaleschke.info git.kaleschke.info cloud.kaleschke.info home.kaleschke.info borg.kaleschke.info grafana.kaleschke.info}"
TMP_DIR="${TMP_DIR:-/tmp/kallilab-cert-token-check}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
: > "$RESULTS_FILE"
trap 'rm -f "$RESULTS_FILE"' EXIT
json_escape() {
sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g'
}
add_result() {
printf '%s\t%s\t%s\n' "$1" "$2" "$3" >> "$RESULTS_FILE"
}
check_cert() {
local domain="$1"
local enddate
local end_epoch
local now_epoch
local days_left
if ! enddate="$(printf '' | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2-)"; then
add_result "critical" "cert_$domain" "Cannot read certificate for $domain"
return
fi
end_epoch="$(date -d "$enddate" +%s)"
now_epoch="$(date +%s)"
days_left="$(( (end_epoch - now_epoch) / 86400 ))"
if [ "$days_left" -lt "$CRITICAL_DAYS" ]; then
add_result "critical" "cert_$domain" "$domain certificate expires in ${days_left}d"
elif [ "$days_left" -lt "$WARN_DAYS" ]; then
add_result "warning" "cert_$domain" "$domain certificate expires in ${days_left}d"
else
add_result "ok" "cert_$domain" "$domain certificate expires in ${days_left}d"
fi
}
check_cloudflare_token() {
local token
local response
if [ ! -s "$CLOUDFLARE_TOKEN_FILE" ]; then
add_result "critical" "cloudflare_token" "Token file missing or empty: $CLOUDFLARE_TOKEN_FILE"
return
fi
token="$(cat "$CLOUDFLARE_TOKEN_FILE")"
if ! response="$(curl -fsS -H "Authorization: Bearer $token" https://api.cloudflare.com/client/v4/user/tokens/verify 2>/dev/null)"; then
add_result "critical" "cloudflare_token" "Cloudflare token verify request failed"
return
fi
if printf '%s' "$response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then
add_result "ok" "cloudflare_token" "Cloudflare token verify succeeded"
else
add_result "critical" "cloudflare_token" "Cloudflare token verify returned non-success"
fi
}
send_ntfy() {
local severity="$1"
local topic="$2"
local body="$3"
if [ "$SEND_NTFY" != "1" ] || ! command -v curl >/dev/null 2>&1; then
return
fi
printf '%s\n' "$body" | curl -fsS \
-H "Title: KalliLab cert-token-check $severity" \
-H "Priority: high" \
--data-binary @- \
"$NTFY_BASE_URL/$topic" >/dev/null || true
}
write_json() {
local timestamp
local critical_count
local warning_count
local status
local first=1
timestamp="$(date -Iseconds)"
critical_count="$(awk -F '\t' '$1 == "critical" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
if [ "$critical_count" -gt 0 ]; then
status="critical"
elif [ "$warning_count" -gt 0 ]; then
status="warning"
else
status="ok"
fi
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
printf '{\n'
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
printf ' "status": "%s",\n' "$status"
printf ' "critical_count": %s,\n' "$critical_count"
printf ' "warning_count": %s,\n' "$warning_count"
printf ' "checks": [\n'
while IFS="$(printf '\t')" read -r severity name message; do
if [ "$first" -eq 0 ]; then printf ',\n'; fi
first=0
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
"$(printf '%s' "$severity" | json_escape)" \
"$(printf '%s' "$name" | json_escape)" \
"$(printf '%s' "$message" | json_escape)"
done < "$RESULTS_FILE"
printf '\n ]\n}\n'
} > "$OUTPUT_PATH.tmp"
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
cat "$OUTPUT_PATH"
if [ "$status" = "critical" ]; then
send_ntfy critical "$CRITICAL_TOPIC" "Certificate/token check critical: $critical_count critical, $warning_count warning. See $OUTPUT_PATH"
return 2
elif [ "$status" = "warning" ]; then
send_ntfy warning "$WARNING_TOPIC" "Certificate/token check warning: $warning_count warning. See $OUTPUT_PATH"
return 1
fi
}
for domain in $DOMAINS; do
check_cert "$domain"
done
check_cloudflare_token
write_json
+96
View File
@@ -0,0 +1,96 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="${REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/compose-runtime-drift-last.json}"
NTFY_SCRIPT="${NTFY_SCRIPT:-$REPO_ROOT/ops/restore-tests/send-ntfy.sh}"
NTFY_TOPIC="${NTFY_TOPIC:-kallilab-warning}"
SEND_NTFY="${SEND_NTFY:-1}"
TMP_DIR="${TMP_DIR:-/tmp/kallilab-compose-runtime-drift}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
: > "$RESULTS_FILE"
trap 'rm -f "$RESULTS_FILE"' EXIT
json_escape() {
sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g'
}
add_result() {
printf '%s\t%s\t%s\n' "$1" "$2" "$3" >> "$RESULTS_FILE"
}
parse_compose() {
local compose="$1"
awk '
/^ [A-Za-z0-9_.-]+:/ {
service=$1
sub(/:$/, "", service)
image=""
container=service
}
service && /^ image:/ {
image=$2
gsub(/["'\'']/, "", image)
}
service && /^ container_name:/ {
container=$2
gsub(/["'\'']/, "", container)
}
service && image && container {
print container "\t" image
service=""
image=""
container=""
}
' "$compose"
}
while IFS= read -r -d '' compose; do
while IFS="$(printf '\t')" read -r container expected_image; do
[ -n "$container" ] || continue
if ! runtime_image="$(docker inspect --format '{{.Config.Image}}' "$container" 2>/dev/null)"; then
add_result "warning" "$container" "Container missing for compose image $expected_image ($compose)"
continue
fi
if [ "$runtime_image" = "$expected_image" ]; then
add_result "ok" "$container" "Runtime image matches $expected_image"
else
add_result "warning" "$container" "Runtime image '$runtime_image' differs from compose '$expected_image' ($compose)"
fi
done < <(parse_compose "$compose")
done < <(find "$REPO_ROOT" -path "$REPO_ROOT/.git" -prune -o -type f \( -name docker-compose.yml -o -name docker-compose.yaml -o -name compose.yml -o -name compose.yaml \) -print0)
timestamp="$(date -Iseconds)"
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
status="ok"
[ "$warning_count" -gt 0 ] && status="warning"
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
printf '{\n'
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
printf ' "status": "%s",\n' "$status"
printf ' "warning_count": %s,\n' "$warning_count"
printf ' "checks": [\n'
first=1
while IFS="$(printf '\t')" read -r severity name message; do
if [ "$first" -eq 0 ]; then printf ',\n'; fi
first=0
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
"$(printf '%s' "$severity" | json_escape)" \
"$(printf '%s' "$name" | json_escape)" \
"$(printf '%s' "$message" | json_escape)"
done < "$RESULTS_FILE"
printf '\n ]\n}\n'
} > "$OUTPUT_PATH.tmp"
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
cat "$OUTPUT_PATH"
if [ "$warning_count" -gt 0 ]; then
if [ "$SEND_NTFY" = "1" ] && [ -x "$NTFY_SCRIPT" ]; then
"$NTFY_SCRIPT" "$NTFY_TOPIC" "Compose/runtime drift detected" "$warning_count drift warning(s). See $OUTPUT_PATH" high || true
fi
exit 1
fi
+268
View File
@@ -0,0 +1,268 @@
#!/usr/bin/env bash
set -euo pipefail
OUTPUT_PATH="${OUTPUT_PATH:-/mnt/user/services/posture-check/last.json}"
NTFY_BASE_URL="${NTFY_BASE_URL:-https://ntfy.sh}"
WARNING_TOPIC="${WARNING_TOPIC:-kallilab-warning}"
CRITICAL_TOPIC="${CRITICAL_TOPIC:-kallilab-critical}"
SEND_NTFY="${SEND_NTFY:-1}"
TMP_DIR="${TMP_DIR:-/tmp/kallilab-posture-check}"
mkdir -p "$TMP_DIR"
RESULTS_FILE="$TMP_DIR/results.$$"
: > "$RESULTS_FILE"
cleanup() {
rm -f "$RESULTS_FILE"
}
trap cleanup EXIT
json_escape() {
sed \
-e 's/\\/\\\\/g' \
-e 's/"/\\"/g' \
-e 's/\t/\\t/g'
}
add_result() {
local severity="$1"
local name="$2"
local message="$3"
printf '%s\t%s\t%s\n' "$severity" "$name" "$message" >> "$RESULTS_FILE"
}
need_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
add_result "warning" "command_$1" "Command missing: $1"
return 1
fi
}
check_fstype() {
local path="$1"
local expected="$2"
local severity="$3"
local name="$4"
local actual
if ! command -v findmnt >/dev/null 2>&1; then
add_result "warning" "$name" "Cannot check $path filesystem because findmnt is missing"
return
fi
if ! actual="$(findmnt -no FSTYPE "$path" 2>/dev/null)"; then
add_result "$severity" "$name" "Mount not found: $path"
return
fi
if [ "$actual" = "$expected" ]; then
add_result "ok" "$name" "$path filesystem is $actual"
else
add_result "$severity" "$name" "$path filesystem is $actual, expected $expected"
fi
}
check_no_ntfs_on_core_mounts() {
local hits
if ! command -v findmnt >/dev/null 2>&1; then
add_result "warning" "no_ntfs_core_mounts" "Cannot check NTFS mounts because findmnt is missing"
return
fi
hits="$(findmnt -rn -o TARGET,FSTYPE 2>/dev/null | awk '$1 ~ "^/mnt/(cache|disk1)(/|$)" && ($2 == "ntfs3" || $2 == "fuseblk") { print $1 ":" $2 }' | paste -sd ',' -)"
if [ -n "$hits" ]; then
add_result "critical" "no_ntfs_core_mounts" "NTFS-like filesystem on core mount: $hits"
else
add_result "ok" "no_ntfs_core_mounts" "No ntfs3/fuseblk mounts below /mnt/cache or /mnt/disk1"
fi
}
check_mover_drift() {
local path="/mnt/disk1/appdata"
if [ ! -d "$path" ]; then
add_result "ok" "mover_drift_appdata" "$path does not exist"
return
fi
if find "$path" -mindepth 1 -print -quit | grep -q .; then
add_result "critical" "mover_drift_appdata" "$path contains entries; appdata should stay cache-only"
else
add_result "ok" "mover_drift_appdata" "$path is empty"
fi
}
check_inode_usage() {
local path="$1"
local max_percent="$2"
local name="$3"
local use_percent
if ! use_percent="$(df -Pi "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
add_result "warning" "$name" "Cannot read inode usage for $path"
return
fi
if [ "$use_percent" -lt "$max_percent" ]; then
add_result "ok" "$name" "$path inode usage ${use_percent}%"
else
add_result "warning" "$name" "$path inode usage ${use_percent}% >= ${max_percent}%"
fi
}
check_filesystem_usage() {
local path="$1"
local max_percent="$2"
local name="$3"
local severity="$4"
local use_percent
if ! use_percent="$(df -P "$path" 2>/dev/null | awk 'NR==2 { gsub("%", "", $5); print $5 }')"; then
add_result "warning" "$name" "Cannot read filesystem usage for $path"
return
fi
if [ "$use_percent" -lt "$max_percent" ]; then
add_result "ok" "$name" "$path usage ${use_percent}%"
else
add_result "$severity" "$name" "$path usage ${use_percent}% >= ${max_percent}%"
fi
}
check_nvme_smart() {
local device="${NVME_DEVICE:-/dev/nvme0n1}"
local smart
local warning
local percentage_used
local media_errors
if ! need_cmd nvme; then
return
fi
if ! smart="$(nvme smart-log "$device" 2>/dev/null)"; then
add_result "critical" "nvme_smart" "Cannot read nvme smart-log for $device"
return
fi
warning="$(printf '%s\n' "$smart" | awk -F: '/critical_warning/ { gsub(/[[:space:]]/, "", $2); print $2; exit }')"
percentage_used="$(printf '%s\n' "$smart" | awk -F: '/percentage_used/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
media_errors="$(printf '%s\n' "$smart" | awk -F: '/media_errors/ { gsub(/[^0-9]/, "", $2); print $2; exit }')"
if [ "${warning:-0}" = "0" ] || [ "${warning:-0}" = "0x00" ]; then
add_result "ok" "nvme_critical_warning" "$device critical_warning ${warning:-0}"
else
add_result "critical" "nvme_critical_warning" "$device critical_warning ${warning}"
fi
if [ -n "${percentage_used:-}" ] && [ "$percentage_used" -lt 80 ]; then
add_result "ok" "nvme_percentage_used" "$device percentage_used ${percentage_used}%"
else
add_result "critical" "nvme_percentage_used" "$device percentage_used ${percentage_used:-unknown}, expected <80"
fi
if [ "${media_errors:-0}" = "0" ]; then
add_result "ok" "nvme_media_errors" "$device media_errors 0"
else
add_result "warning" "nvme_media_errors" "$device media_errors ${media_errors}"
fi
}
send_ntfy() {
local severity="$1"
local topic="$2"
local body="$3"
if [ "$SEND_NTFY" != "1" ]; then
return
fi
if command -v curl >/dev/null 2>&1; then
printf '%s\n' "$body" | curl -fsS \
-H "Title: KalliLab posture-check $severity" \
-H "Priority: high" \
--data-binary @- \
"$NTFY_BASE_URL/$topic" >/dev/null || true
fi
}
write_json() {
local timestamp
local critical_count
local warning_count
local status
local first=1
timestamp="$(date -Iseconds)"
critical_count="$(awk -F '\t' '$1 == "critical" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
warning_count="$(awk -F '\t' '$1 == "warning" { count++ } END { print count + 0 }' "$RESULTS_FILE")"
if [ "$critical_count" -gt 0 ]; then
status="critical"
elif [ "$warning_count" -gt 0 ]; then
status="warning"
else
status="ok"
fi
mkdir -p "$(dirname "$OUTPUT_PATH")"
{
printf '{\n'
printf ' "timestamp": "%s",\n' "$(printf '%s' "$timestamp" | json_escape)"
printf ' "status": "%s",\n' "$status"
printf ' "critical_count": %s,\n' "$critical_count"
printf ' "warning_count": %s,\n' "$warning_count"
printf ' "checks": [\n'
while IFS="$(printf '\t')" read -r severity name message; do
if [ "$first" -eq 0 ]; then
printf ',\n'
fi
first=0
printf ' {"severity":"%s","name":"%s","message":"%s"}' \
"$(printf '%s' "$severity" | json_escape)" \
"$(printf '%s' "$name" | json_escape)" \
"$(printf '%s' "$message" | json_escape)"
done < "$RESULTS_FILE"
printf '\n ]\n'
printf '}\n'
} > "$OUTPUT_PATH.tmp"
mv "$OUTPUT_PATH.tmp" "$OUTPUT_PATH"
cat "$OUTPUT_PATH"
if [ "$status" = "critical" ]; then
send_ntfy "critical" "$CRITICAL_TOPIC" "Posture-check critical: $critical_count critical, $warning_count warning. See $OUTPUT_PATH"
return 2
fi
if [ "$status" = "warning" ]; then
send_ntfy "warning" "$WARNING_TOPIC" "Posture-check warning: $warning_count warning. See $OUTPUT_PATH"
return 1
fi
}
main() {
need_cmd findmnt || true
need_cmd df || true
need_cmd awk || true
check_fstype "/mnt/cache" "xfs" "critical" "cache_fstype"
check_fstype "/mnt/disk1" "xfs" "critical" "disk1_fstype"
check_no_ntfs_on_core_mounts
check_mover_drift
check_inode_usage "/mnt/cache" 80 "cache_inode_usage"
check_inode_usage "/mnt/disk1" 80 "disk1_inode_usage"
check_filesystem_usage "/mnt/cache" 70 "cache_fill_level" "warning"
for share in appdata system domains; do
if [ -e "/mnt/user/$share" ]; then
check_filesystem_usage "/mnt/user/$share" 70 "share_${share}_fill_level" "warning"
else
add_result "warning" "share_${share}_fill_level" "/mnt/user/$share missing"
fi
done
check_nvme_smart
write_json
}
main "$@"
+5
View File
@@ -0,0 +1,5 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec "$SCRIPT_DIR/posture-check.sh" "$@"