From a137129c751f2893b3098ba8c6dbf2de66c73b95 Mon Sep 17 00:00:00 2001 From: Micha Date: Tue, 23 Jun 2026 20:56:13 +0200 Subject: [PATCH] cert-token-check: Healthchecks heartbeat; document internal ping URLs Add the same endpoint-agnostic Healthchecks ping wrapper to cert-token-check.sh (daily) as in posture-check.sh; capability URL from host secret file healthchecks_cert_token_url. SECRETS_MAP: document the per-job internal ping URL files. MASTER_TODO: posture-check + cert-token-check wired and verified (status up); project KalliLab CORE + ntfy integration created. Co-Authored-By: Claude Opus 4.8 --- docs/MASTER_TODO.md | 2 +- docs/SECRETS_MAP.md | 1 + services/posture-check/cert-token-check.sh | 28 ++++++++++++++++++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/docs/MASTER_TODO.md b/docs/MASTER_TODO.md index 7de4ad6..e439820 100644 --- a/docs/MASTER_TODO.md +++ b/docs/MASTER_TODO.md @@ -27,7 +27,7 @@ Host-Reports (`/mnt/user/backups/restore-reports/`) und in der Git-Historie. | Home Assistant Tibber | Operator/Codex | Tibber per HA-UI-Config-Flow verbinden. Danach Energy-Dashboard um echte Kosten/Preisquelle ergaenzen; SolarEdge-PV, Netz und Speicher sind bereits konfiguriert und validiert | `docs/runbooks/smart-home-bootstrap.md`, `docs/DECISIONS.md` | | Nearline-Pull Dead-Man's-Switch | Operator | **S4U-Root-Cause 2026-06-21 behoben + verifiziert:** Task `KalliLab H Drive Nearline Pull` von S4U auf LogonType `Interactive` ("Nur wenn Benutzer angemeldet") umgestellt (kein Passwort noetig, da `michi` Dauer-Konsolen-User) -> per Planer mit `0x0` bestaetigt. Spiegel frisch, Exit-Code-Leak gefixt, Heartbeat-Pings gepusht. **Verbleibt (optional, niedrige Dringlichkeit):** je einen Healthchecks-Check anlegen + Capability-URL hinterlegen (baerchen ENV `HEALTHCHECKS_NEARLINE_URL`/Datei; Unraid `/mnt/user/appdata/secrets/healthchecks_borg_url`) | `ops/h-drive-nearline/README.md` | | Monitoring Single-File-Bind-Mount Hardening | Operator/Claude | alertmanager/blackbox/loki/promtail + alertmanager-ntfy-bridge lokal auf Directory-Mounts umgestellt (grafana-provisioning war bereits Directory-Mount); `docker compose config` gruen. **Verbleibt:** Push + Komodo-Redeploy des monitoring-Stacks mit `--force-recreate` (Mount-Pfade aendern sich), danach Reload-/Alert-Smoke | `monitoring/docker-compose.yml` | -| Healthchecks self-hosted (interne Jobs) | Operator | **Live seit 2026-06-23** auf `https://hc.kaleschke.info` (Komodo-Stack-ID `6a3acf2ca7867a4fbab9bfc1`, beide Container healthy, Superuser angelegt). Gitea->Komodo-Webhook seit 2026-06-23 aktiv (authentifiziert, Deploy ausgeloest). **Verbleibt nur noch:** interne Jobs (posture-check, restore-tests, Dumps) als Checks verdrahten + ntfy-Integration. Externe Backup-/Host-down-Waechter bleiben auf healthchecks.io-Cloud | `ops/healthchecks/README.md` | +| Healthchecks self-hosted (interne Jobs) | Operator | **Live seit 2026-06-23** auf `https://hc.kaleschke.info` (Komodo-Stack-ID `6a3acf2ca7867a4fbab9bfc1`, beide Container healthy, Superuser angelegt). Gitea->Komodo-Webhook seit 2026-06-23 aktiv. Projekt `KalliLab CORE` + ntfy-Integration (`homelab-alerts`) angelegt. **Verdrahtet + verifiziert (Status `up`):** `posture-check` (stuendlich), `cert-token-check` (taeglich) - je endpoint-agnostischer Ping, Capability-URL als Host-Secret. **Verbleibt (optional, gleiches Muster):** `compose-runtime-drift`, `daily-status-report`, `komodo-stack-hygiene` als Checks nachziehen. Externe Backup-/Host-down-Waechter bleiben auf healthchecks.io-Cloud | `ops/healthchecks/README.md` | --- diff --git a/docs/SECRETS_MAP.md b/docs/SECRETS_MAP.md index d037efd..c5a1064 100644 --- a/docs/SECRETS_MAP.md +++ b/docs/SECRETS_MAP.md @@ -57,6 +57,7 @@ Dieses Dokument listet sensible Daten, deren Ablageorte und die vorgesehene Einb | Healthchecks self-hosted | Superuser Login | Komodo Stack-ENV `${HEALTHCHECKS_SUPERUSER_EMAIL}` (Login-Mail) + `${HEALTHCHECKS_SUPERUSER_PASSWORD}`; **Login-Passwort als Host-Datei** `/mnt/user/appdata/secrets/healthchecks_superuser_password.txt` (nach erstem Login aenderbar) | aktiv (2026-06-23) | | Healthchecks self-hosted | Gitea->Komodo Webhook Secret | `/mnt/user/appdata/secrets/healthchecks_webhook_secret.txt` (chmod 600) = per-Stack `webhook_secret` in Komodo; im Gitea-Hook identisch eintragen | aktiv (2026-06-23) | | healthchecks-postgres | DB Password | `/mnt/user/appdata/secrets/healthchecks_postgres_password.txt` -> `POSTGRES_PASSWORD_FILE` | aktiv (2026-06-23) | +| Healthchecks self-hosted (interne Job-Pings) | Ping-/Capability-URLs | je Job eine Datei `/mnt/user/appdata/secrets/healthchecks__url` (chmod 600), z. B. `healthchecks_posture_url`; gelesen vom jeweiligen Script (`HEALTHCHECKS__URL`/Datei, endpoint-agnostisch wie `pre-borg.sh`). Capability-URL -> wie Secret behandeln, nie ins Repo | aktiv (2026-06-23) | | Unraid Flash Backup | Boot-/Array-/Share-/Plugin-Konfiguration, ggf. Hashes/Keys/Templates | `/mnt/user/backups/borg/dumps/latest/unraid-flash-config.tar.gz`, via Borg/Hetzner gesichert | aktiv; wie Secret-Material behandeln | | Hermes Agent | Provider-Keys, Bot-Tokens, API-Server-Key | `/mnt/user/appdata/hermes-agent/data/.env` | VM-seitig offen | | Hermes Agent | SSH-Runner Private Key | `/mnt/user/appdata/secrets/hermes_runner_id_ed25519` -> `/root/.ssh/id_ed25519` | VM-seitig offen | diff --git a/services/posture-check/cert-token-check.sh b/services/posture-check/cert-token-check.sh index a49e942..2af5a07 100755 --- a/services/posture-check/cert-token-check.sh +++ b/services/posture-check/cert-token-check.sh @@ -137,8 +137,26 @@ write_json() { fi } -for domain in $DOMAINS; do - check_cert "$domain" -done -check_cloudflare_token -write_json +# --- Healthchecks Heartbeat (endpoint-agnostisch; Capability-URL ist ein Secret, nie ins Repo) --- +HEALTHCHECKS_CERT_TOKEN_URL="${HEALTHCHECKS_CERT_TOKEN_URL:-}" +HEALTHCHECKS_CERT_TOKEN_URL_FILE="${HEALTHCHECKS_CERT_TOKEN_URL_FILE:-/mnt/user/appdata/secrets/healthchecks_cert_token_url}" +if [ -z "$HEALTHCHECKS_CERT_TOKEN_URL" ] && [ -r "$HEALTHCHECKS_CERT_TOKEN_URL_FILE" ]; then + HEALTHCHECKS_CERT_TOKEN_URL="$(tr -d '[:space:]' < "$HEALTHCHECKS_CERT_TOKEN_URL_FILE")" +fi +hc_ping() { + [ -n "$HEALTHCHECKS_CERT_TOKEN_URL" ] || return 0 + curl -fsS -m 10 --retry 3 "${HEALTHCHECKS_CERT_TOKEN_URL}${1:-}" >/dev/null 2>&1 || true +} + +hc_ping "/start" +rc=0 +{ + for domain in $DOMAINS; do + check_cert "$domain" + done + check_cloudflare_token + write_json +} || rc=$? +# 0/1/2 = ok/warning/critical: der Check LIEF (Alarme laufen separat via ntfy); nur rc>2 -> /fail +if [ "$rc" -le 2 ]; then hc_ping ""; else hc_ping "/fail"; fi +exit "$rc"