cbfbb8ca4f
Self-hosted Healthchecks (ops/healthchecks/) as the hub for internal cron/job heartbeats. The three host-down/backup watchdogs (Borg pre-hook, baerchen nearline pull, monitoring watchdog #8) deliberately stay on healthchecks.io cloud, since an on-host watcher cannot report a host outage. - frontend_net + dedicated PostgreSQL 18 in healthchecks_internal - native Healthchecks auth; ping/API exempt from Authelia (n8n/Komodo pattern) - registered as middleware_exempt in ops/policy-checks/exceptions.json - docs: DECISIONS, ARCHITECTURE (3.1/4.2/7.6/10), SERVICE_CATALOG, SECRETS_MAP, MASTER_TODO, README index docker compose config validated (exit 0). Not yet deployed: host secret file, appdata dir, Komodo stack + ENV and Gitea webhook remain operator steps. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
125 lines
4.1 KiB
YAML
125 lines
4.1 KiB
YAML
name: healthchecks
|
|
|
|
# Self-gehostetes Healthchecks (Dead-Man's-Switch / Cron-Heartbeat-Monitor).
|
|
#
|
|
# SCOPE (bewusst): Hub fuer die vielen INTERNEN Jobs/Scripte, die auf einem
|
|
# laufenden Host melden sollen "lief Job X heute?" (posture-check,
|
|
# restore-tests, pre-backup-dumps, gitea-bundle-mirror, ...).
|
|
#
|
|
# NICHT hier: die host-down-/backup-still-Waechter (Borg-Pre-Hook,
|
|
# baerchen-Nearline-Pull, Monitoring-Watchdog #8) bleiben bewusst EXTERN auf
|
|
# healthchecks.io-Cloud. Ein Waechter auf demselben Host kann einen
|
|
# Host-Ausfall nicht melden (er ist dann selbst tot). Siehe ops/healthchecks/README.md.
|
|
|
|
services:
|
|
healthchecks:
|
|
image: healthchecks/healthchecks:v4.2@sha256:6b5f593d40994345053f05f86decfa9e17ab1e4422df2ae58abd032a7b14d8f6
|
|
container_name: healthchecks
|
|
restart: unless-stopped
|
|
|
|
# ntfy-Integration nutzt die oeffentliche Traefik-URL; Container-DNS loest
|
|
# ntfy.kaleschke.info sonst nicht (gleiches Muster wie mealie/komodo).
|
|
extra_hosts:
|
|
- "ntfy.kaleschke.info:192.168.178.58"
|
|
|
|
environment:
|
|
TZ: Europe/Berlin
|
|
DEBUG: "False"
|
|
SITE_ROOT: https://hc.kaleschke.info
|
|
SITE_NAME: KalliLab Healthchecks
|
|
ALLOWED_HOSTS: hc.kaleschke.info,localhost
|
|
REGISTRATION_OPEN: "False"
|
|
|
|
DB: postgres
|
|
DB_HOST: healthchecks-postgres
|
|
DB_PORT: "5432"
|
|
DB_NAME: healthchecks
|
|
DB_USER: healthchecks
|
|
DB_PASSWORD: ${HEALTHCHECKS_DB_PASSWORD}
|
|
|
|
SECRET_KEY: ${HEALTHCHECKS_SECRET_KEY}
|
|
|
|
# Erst-Admin wird beim Start angelegt/aktualisiert. Werte nur als
|
|
# Komodo-Stack-ENV, niemals im Repo. SMTP ist bewusst nicht konfiguriert
|
|
# (Login via Superuser-Passwort, Benachrichtigung via ntfy-Integration).
|
|
SUPERUSER_EMAIL: ${HEALTHCHECKS_SUPERUSER_EMAIL}
|
|
SUPERUSER_PASSWORD: ${HEALTHCHECKS_SUPERUSER_PASSWORD}
|
|
|
|
networks:
|
|
- frontend_net
|
|
- healthchecks_internal
|
|
|
|
depends_on:
|
|
healthchecks-postgres:
|
|
condition: service_healthy
|
|
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:8000/', timeout=5).status==200 else 1)\""]
|
|
interval: 60s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 60s
|
|
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
|
|
labels:
|
|
# Traefik mit nativer Healthchecks-Auth, bewusst OHNE pauschale
|
|
# authelia@file: die Ping-Endpunkte (/ping/*) und die API muessen ohne
|
|
# ForwardAuth erreichbar sein, sonst koennen Cron-Jobs nicht melden
|
|
# (gleiche Ausnahme-Logik wie n8n/Komodo). Dashboard ist durch den
|
|
# Healthchecks-eigenen Login geschuetzt.
|
|
- traefik.enable=true
|
|
- traefik.docker.network=frontend_net
|
|
- traefik.http.routers.healthchecks.rule=Host(`hc.kaleschke.info`)
|
|
- traefik.http.routers.healthchecks.entrypoints=websecure
|
|
- traefik.http.routers.healthchecks.tls=true
|
|
- traefik.http.routers.healthchecks.tls.certresolver=le
|
|
- traefik.http.routers.healthchecks.middlewares=secure-headers@file
|
|
- traefik.http.services.healthchecks.loadbalancer.server.port=8000
|
|
|
|
healthchecks-postgres:
|
|
image: postgres:18.4@sha256:29ee7bb30d804447dc9a91fd0d74322ae1dc3a4072cc6346f70a5ed6e783b565
|
|
container_name: healthchecks-postgres
|
|
restart: unless-stopped
|
|
|
|
environment:
|
|
TZ: Europe/Berlin
|
|
POSTGRES_USER: healthchecks
|
|
POSTGRES_DB: healthchecks
|
|
POSTGRES_PASSWORD_FILE: /run/secrets/healthchecks_postgres_password
|
|
PGDATA: /var/lib/postgresql/18/docker
|
|
|
|
volumes:
|
|
- /mnt/user/appdata/healthchecks/postgres18:/var/lib/postgresql
|
|
|
|
networks:
|
|
- healthchecks_internal
|
|
|
|
secrets:
|
|
- healthchecks_postgres_password
|
|
|
|
expose:
|
|
- "5432"
|
|
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U \"$${POSTGRES_USER}\" -d \"$${POSTGRES_DB}\""]
|
|
interval: 10s
|
|
timeout: 10s
|
|
retries: 5
|
|
start_period: 30s
|
|
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
|
|
networks:
|
|
frontend_net:
|
|
external: true
|
|
healthchecks_internal:
|
|
driver: bridge
|
|
internal: true
|
|
|
|
secrets:
|
|
healthchecks_postgres_password:
|
|
file: /mnt/user/appdata/secrets/healthchecks_postgres_password.txt
|