143 lines
3.2 KiB
Bash
Executable File
143 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
BASE_DIR="${BASE_DIR:-/mnt/user/services/posture-check}"
|
|
WATCHER_SCRIPT="${WATCHER_SCRIPT:-/mnt/user/services/homelab-infra/services/posture-check/docker-critical-events.sh}"
|
|
PID_FILE="${PID_FILE:-$BASE_DIR/docker-critical-events.pid}"
|
|
OUT_FILE="${OUT_FILE:-$BASE_DIR/docker-critical-events.out}"
|
|
EVENT_LOG="${EVENT_LOG:-$BASE_DIR/docker-critical-events-last.log}"
|
|
NTFY_SCRIPT="${NTFY_SCRIPT:-/mnt/user/services/homelab-infra/ops/restore-tests/send-ntfy.sh}"
|
|
NTFY_TOPIC="${NTFY_TOPIC:-homelab-alerts}"
|
|
|
|
usage() {
|
|
cat >&2 <<EOF
|
|
Usage: $0 start|stop|restart|status|smoke
|
|
|
|
start Start Docker critical-events watcher in the background.
|
|
stop Stop the watcher by pidfile.
|
|
restart Stop and start the watcher.
|
|
status Print watcher status and recent log tail.
|
|
smoke Send one ntfy test message through the same alert path.
|
|
EOF
|
|
}
|
|
|
|
is_running() {
|
|
[ -s "$PID_FILE" ] || return 1
|
|
local pid
|
|
pid="$(cat "$PID_FILE")"
|
|
[ -n "$pid" ] || return 1
|
|
kill -0 "$pid" >/dev/null 2>&1
|
|
}
|
|
|
|
start_watcher() {
|
|
mkdir -p "$BASE_DIR"
|
|
|
|
if is_running; then
|
|
echo "docker-critical-events watcher already running (pid $(cat "$PID_FILE"))"
|
|
return 0
|
|
fi
|
|
|
|
if [ ! -r "$WATCHER_SCRIPT" ]; then
|
|
echo "Watcher script not readable: $WATCHER_SCRIPT" >&2
|
|
return 1
|
|
fi
|
|
|
|
NTFY_SCRIPT="$NTFY_SCRIPT" \
|
|
NTFY_TOPIC="$NTFY_TOPIC" \
|
|
OUTPUT_PATH="$EVENT_LOG" \
|
|
nohup bash "$WATCHER_SCRIPT" >"$OUT_FILE" 2>&1 </dev/null &
|
|
|
|
echo "$!" > "$PID_FILE"
|
|
sleep 1
|
|
|
|
if is_running; then
|
|
echo "docker-critical-events watcher started (pid $(cat "$PID_FILE"))"
|
|
else
|
|
echo "docker-critical-events watcher failed to stay running; see $OUT_FILE" >&2
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
stop_watcher() {
|
|
if ! is_running; then
|
|
rm -f "$PID_FILE"
|
|
echo "docker-critical-events watcher is not running"
|
|
return 0
|
|
fi
|
|
|
|
local pid
|
|
pid="$(cat "$PID_FILE")"
|
|
kill "$pid" >/dev/null 2>&1 || true
|
|
sleep 1
|
|
|
|
if kill -0 "$pid" >/dev/null 2>&1; then
|
|
echo "watcher still running after SIGTERM; sending SIGKILL"
|
|
kill -9 "$pid" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
rm -f "$PID_FILE"
|
|
echo "docker-critical-events watcher stopped"
|
|
}
|
|
|
|
status_watcher() {
|
|
if is_running; then
|
|
echo "status=running pid=$(cat "$PID_FILE")"
|
|
else
|
|
echo "status=stopped"
|
|
[ -e "$PID_FILE" ] && echo "stale_pidfile=$PID_FILE"
|
|
fi
|
|
|
|
echo "watcher_script=$WATCHER_SCRIPT"
|
|
echo "event_log=$EVENT_LOG"
|
|
echo "out_file=$OUT_FILE"
|
|
|
|
if [ -s "$EVENT_LOG" ]; then
|
|
echo
|
|
echo "Recent critical events:"
|
|
tail -n 20 "$EVENT_LOG"
|
|
fi
|
|
|
|
if [ -s "$OUT_FILE" ]; then
|
|
echo
|
|
echo "Recent watcher output:"
|
|
tail -n 20 "$OUT_FILE"
|
|
fi
|
|
}
|
|
|
|
smoke_ntfy() {
|
|
if [ ! -r "$NTFY_SCRIPT" ]; then
|
|
echo "ntfy helper not readable: $NTFY_SCRIPT" >&2
|
|
return 1
|
|
fi
|
|
|
|
bash "$NTFY_SCRIPT" \
|
|
"$NTFY_TOPIC" \
|
|
"Docker critical watcher smoke" \
|
|
"Smoke test from $(hostname) at $(date -Iseconds). No container was stopped." \
|
|
default
|
|
echo "smoke notification sent to $NTFY_TOPIC"
|
|
}
|
|
|
|
case "${1:-}" in
|
|
start)
|
|
start_watcher
|
|
;;
|
|
stop)
|
|
stop_watcher
|
|
;;
|
|
restart)
|
|
stop_watcher
|
|
start_watcher
|
|
;;
|
|
status)
|
|
status_watcher
|
|
;;
|
|
smoke)
|
|
smoke_ntfy
|
|
;;
|
|
*)
|
|
usage
|
|
exit 2
|
|
;;
|
|
esac
|