Files
homelab-infra/ops/hermes-agent/services.yaml
T

567 lines
18 KiB
YAML

# services.yaml — Maschinenlesbare Wissensbasis fuer Hermes Alert Enrichment
#
# Abgeleitet aus docs/SERVICE_CATALOG.md
# Stand: 2026-05-31
#
# Zweck: Hermes laedt diese Datei beim Alert-Anreichern, um Abhaengigkeiten,
# Dump-Zeitstempel und den ersten Diagnoseschritt nachzuschlagen.
#
# Felder:
# description - Kurzbeschreibung des Dienstes
# tier - Kritikalitaet: 1=Control Plane, 2=User Apps, 3=Ops/Tools
# category - core | security | infra | app | ops
# container_name - exakter Docker-Containername (fuer docker inspect)
# dependencies - Liste direkter Laufzeit-Abhaengigkeiten (andere Service-Keys)
# url - oeffentliche URL (null = intern/LAN only)
# dump_file - Dateiname in /mnt/user/backups/borg/dumps/latest/ (null = kein Dump)
# data_paths - kritische Datenpfade auf dem Host
# first_check - erster Diagnoseschritt bei Ausfall (Freitext fuer Hermes)
# notes - betriebliche Hinweise und dokumentierte Ausnahmen
meta:
dump_base: /mnt/user/backups/borg/dumps/latest
appdata_base: /mnt/user/appdata
secrets_path: /mnt/user/appdata/secrets
# ---------------------------------------------------------------------------
# TIER 1 — Control Plane (Ausfall blockiert alles darunter)
# ---------------------------------------------------------------------------
services:
traefik:
description: Zentraler Reverse Proxy, TLS, Docker-Label-Routing
tier: 1
category: core
container_name: traefik
dependencies: []
url: https://traefik.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/traefik/dynamic
- /mnt/user/appdata/traefik/letsencrypt
first_check: "Host-Ports 80/443 erreichbar? dynamic/ korrekt auf Host synchronisiert?"
notes: "dynamic configs werden NICHT automatisch von Komodo deployed — manueller Host-Sync noetig"
adguard:
description: DNS-Server / LAN DNS
tier: 1
category: core
container_name: adguard
dependencies:
- unbound
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/adguard/conf
- /mnt/user/appdata/adguard/work
first_check: "Port 53 erreichbar? Unbound healthy? dns_net Konnektivitaet?"
notes: "Ports 53 und 8082 dokumentierte Host-Port-Ausnahmen"
unbound:
description: Upstream DNS Resolver fuer AdGuard
tier: 1
category: core
container_name: unbound
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/unbound/config
first_check: "dns_net Konnektivitaet pruefen; Container-Logs auf Fehler pruefen"
notes: "rebuildbar; isoliert in dns_net"
tailscale:
description: VPN / Remote-Zugang
tier: 1
category: core
container_name: tailscale
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/tailscale
first_check: "Tailscale Status auf Host pruefen; State-Datei fuer Key-Renewal vorhanden?"
notes: "network_mode: host; NET_ADMIN, NET_RAW, /dev/net/tun — dokumentierte VPN-Ausnahmen"
gitea:
description: Git-Server — operative Quelle der Wahrheit fuer GitOps
tier: 1
category: core
container_name: gitea
dependencies:
- traefik
url: https://git.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/services/gitea/data
first_check: "HTTPS erreichbar? SQLite in /data intakt? SSH-Port 222 erreichbar?"
notes: "SQLite in /data — kein separater Dump; ohne externen Mirror im DR kritisch"
authelia:
description: ForwardAuth — zentrale Authentifizierung fuer Admin-UIs
tier: 1
category: security
container_name: authelia
dependencies:
- postgresql17
- traefik
url: https://auth.kaleschke.info
dump_file: postgresql17-authelia.dump
data_paths:
- /mnt/user/appdata/authelia/config
first_check: "PostgreSQL healthy? SMTP via GMX erreichbar? Host-Config aktuell (Repo-Baseline != Host)?"
notes: "kein Redis-Session-Backend; SMTP-Notifier GMX; Repo-Baseline muss manuell in Host-Config gemerged werden"
vaultwarden:
description: Passwort-Tresor
tier: 1
category: security
container_name: vaultwarden
dependencies:
- traefik
url: https://vault.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/vaultwarden
first_check: "HTTPS erreichbar? Appdata-Volume intakt?"
notes: "ADMIN_TOKEN_FILE; keine direkten Host-Ports"
postgresql17:
description: Shared PostgreSQL 18 Cluster (historischer Containername; Authelia, Paperless, Mail-Archiver)
tier: 1
category: infra
container_name: postgresql17
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/postgresql18
first_check: "backend_net Konnektivitaet? Disk-Space auf /mnt/user/appdata? pg_isready im Container?"
notes: "Dumps per Dienst unter dumps/latest; raw DB nicht primaerer Restore-Weg; alter PG17-Pfad bleibt nur Rollback-Altstand"
komodo-core:
description: GitOps UI / API / Stack-Manager
tier: 1
category: ops
container_name: komodo-core
dependencies:
- komodo-mongo
- gitea
- traefik
url: https://komodo.kaleschke.info
dump_file: komodo-mongo.archive.gz
data_paths:
- /mnt/user/appdata/komodo/core
first_check: "MongoDB healthy? Gitea erreichbar? komodo_net Konnektivitaet?"
notes: "keine pauschale Authelia-ForwardAuth; Gitea DNS override konfiguriert"
komodo-mongo:
description: Komodo Datenbank (MongoDB)
tier: 1
category: infra
container_name: komodo-mongo
dependencies: []
url: null
dump_file: komodo-mongo.archive.gz
data_paths:
- /mnt/user/appdata/komodo/mongo
first_check: "komodo_net Konnektivitaet? Disk-Space? mongosh ping?"
notes: "Dump-Integritaet nach Major-Upgrades pruefen"
komodo-periphery:
description: Komodo Host-Agent (Stack-Deployments)
tier: 1
category: ops
container_name: komodo-periphery
dependencies:
- komodo-core
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/komodo/periphery
first_check: "Docker-Socket lesbar? /mnt/user/services gemountet? komodo_net Verbindung zu Core?"
notes: "Docker-Socket-Ausnahme dokumentiert; /mnt/user/services Mount fuer Stack-Workspaces"
# ---------------------------------------------------------------------------
# TIER 2 — User Apps
# ---------------------------------------------------------------------------
redis:
description: Shared Redis Cache (Paperless, weitere)
tier: 2
category: infra
container_name: redis
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/redis
first_check: "backend_net Konnektivitaet? redis-cli ping erreichbar?"
notes: "transiente Daten; bewusst nicht Backup-kritisch"
paperless-ngx:
description: Dokumentenmanagement
tier: 2
category: app
container_name: paperless-ngx
dependencies:
- postgresql17
- redis
- traefik
url: https://paperless.kaleschke.info
dump_file: postgresql17-paperless.dump
data_paths:
- /mnt/user/appdata/paperless-ngx/data
- /mnt/user/documents/paperless
- /mnt/user/documents/scans_inbox
first_check: "Redis healthy? PostgreSQL healthy? backend_net Konnektivitaet?"
notes: "DB/Redis Secrets als Stack ENV (keine _FILE Variante)"
paperless-gpt:
description: KI-Ergaenzung fuer Paperless (OCR/Tagging via LLM)
tier: 2
category: app
container_name: paperless-gpt
dependencies:
- paperless-ngx
- traefik
url: https://paperless-gpt.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/paperless-gpt/data
- /mnt/user/appdata/paperless-gpt/prompts
first_check: "Paperless API erreichbar? LLM/Ollama erreichbar? API Token gesetzt?"
notes: "API Token als Stack ENV; abhaengig von laufendem Paperless"
immich_server:
description: Foto-/Video-App
tier: 2
category: app
container_name: immich_server
dependencies:
- immich_postgres
- immich_redis
- immich_machine_learning
- traefik
url: https://immich.kaleschke.info
dump_file: immich.dump
data_paths:
- /mnt/user/photos/immich
- /mnt/user/photos/family_archive
first_check: "immich_postgres healthy? immich_redis healthy? ML-Container healthy? immich_default Netz?"
notes: "native App-Auth; externes Fotoarchiv gemountet"
immich_postgres:
description: Immich-Datenbank
tier: 2
category: infra
container_name: immich_postgres
dependencies: []
url: null
dump_file: immich.dump
data_paths:
- /mnt/user/appdata/immich_postgres_vectorchord
first_check: "immich_default Netz? Disk-Space? pg_isready?"
notes: "PG14 mit VectorChord/pgvector; nie ins frontend_net; immich_default Netz isoliert; alter immich_postgres-Pfad bleibt nur Rollback-Altstand"
immich_redis:
description: Immich Cache
tier: 2
category: infra
container_name: immich_redis
dependencies: []
url: null
dump_file: null
data_paths: []
first_check: "immich_default Netz? redis-cli ping?"
notes: "rebuildbar; anonymes Volume — named volume als offenes TODO"
immich_machine_learning:
description: Immich ML (Gesichtserkennung, Suche)
tier: 2
category: infra
container_name: immich_machine_learning
dependencies: []
url: null
dump_file: null
data_paths:
- model-cache
first_check: "immich_default Netz? model-cache Volume vorhanden?"
notes: "rebuildbar; intern-only"
mealie:
description: Rezeptverwaltung
tier: 2
category: app
container_name: mealie
dependencies:
- mealie-postgres
- traefik
url: https://mealie.kaleschke.info
dump_file: mealie.dump
data_paths:
- /mnt/user/appdata/mealie/data
first_check: "mealie-postgres healthy? mealie_internal Netz erreichbar?"
notes: "App + DB in internem Netz getrennt (mealie_internal)"
mealie-postgres:
description: Mealie-Datenbank
tier: 2
category: infra
container_name: mealie-postgres
dependencies: []
url: null
dump_file: mealie.dump
data_paths:
- /mnt/user/appdata/mealie/postgres18
first_check: "mealie_internal Netz? Disk-Space?"
notes: "interne DB; mealie_internal Netz"
mail-archiver:
description: Mail-Archivierung (IMAP)
tier: 2
category: app
container_name: mail-archiver
dependencies:
- postgresql17
- authelia
- traefik
url: https://mail.kaleschke.info
dump_file: postgresql17-mailarchiver.dump
data_paths:
- /mnt/user/appdata/mailarchiver/data-protection-keys
first_check: "PostgreSQL healthy? Internet-/IMAP-Zugang? Authelia healthy?"
notes: "Hybrid: frontend_net fuer IMAP/Internet, backend_net fuer DB"
nextcloud:
description: Datei-/Cloud-Dienst
tier: 2
category: app
container_name: nextcloud
dependencies:
- nextcloud-postgres
- nextcloud-redis
- traefik
url: https://cloud.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/nextcloud/html
- /mnt/user/documents/nextcloud-data
first_check: "nextcloud-postgres healthy? nextcloud-redis healthy? nextcloud_internal Netz?"
notes: "native App-Auth (kein zentrales ForwardAuth); WebDAV/CardDAV beachten"
nextcloud-postgres:
description: Nextcloud-Datenbank
tier: 2
category: infra
container_name: nextcloud-postgres
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/nextcloud/postgres18
first_check: "nextcloud_internal Netz? Disk-Space?"
notes: "interne DB"
nextcloud-redis:
description: Nextcloud Cache / Locking
tier: 2
category: infra
container_name: nextcloud-redis
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/nextcloud/redis
first_check: "nextcloud_internal Netz? redis-cli ping?"
notes: "rebuildbar"
ntfy:
description: Push-Benachrichtigungen (Alert-Backbone)
tier: 2
category: app
container_name: ntfy
dependencies:
- traefik
url: https://ntfy.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/ntfy
first_check: "HTTPS erreichbar? NTFY_BEHIND_PROXY=true gesetzt? Traefik healthy?"
notes: "KRITISCH: Ausfall bedeutet keine anderen Alerts ankommen; Monitoring/Borg-Benachrichtigungen"
# ---------------------------------------------------------------------------
# TIER 3 — Ops / Tools (Ausfall schmerzt, blockiert nichts Kritisches)
# ---------------------------------------------------------------------------
glance:
description: Homelab-Dashboard
tier: 3
category: ops
container_name: glance
dependencies:
- traefik
url: https://glance.kaleschke.info
dump_file: null
data_paths: []
first_check: "Traefik erreichbar? Docker-Socket-Proxy intern erreichbar? API-Tokens fuer Widgets gueltig?"
notes: "aktives Homelab-Dashboard; Homepage wurde entfernt"
monitoring-grafana:
description: Zentrale Observability-UI
tier: 3
category: ops
container_name: monitoring-grafana
dependencies:
- monitoring-prometheus
- monitoring-loki
- monitoring-influxdb3-core
- traefik
url: https://monitoring.kaleschke.info
dump_file: null
data_paths:
- grafana_data
first_check: "Authelia-Redirect? Datasources Prometheus, Loki und InfluxDB 3 Core gruen?"
notes: "ersetzt alten Grafana-Altstand und Uptime-Kuma-Views"
monitoring-influxdb3-core:
description: Zeitreihen- / Metrikdaten fuer Monitoring und Home Assistant
tier: 3
category: ops
container_name: monitoring-influxdb3-core
dependencies:
- monitoring-grafana
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/influxdb3/data
- /mnt/user/appdata/influxdb3/plugins
first_check: "LAN-Port 8181 erreichbar? 401 ohne Token = OK (erwartet). Disk-Space?"
notes: "LAN-only Host-Port 8181; kein frontend_net; laeuft als user 0"
scrutiny:
description: Laufwerks- / SMART-Monitoring
tier: 3
category: ops
container_name: scrutiny
dependencies:
- traefik
url: https://scrutiny.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/scrutiny/config
- /mnt/user/appdata/scrutiny/influxdb
first_check: "Device-Mounts vorhanden? privileged=true gesetzt? Traefik erreichbar?"
notes: "privileged: true dokumentierte Ausnahme"
glances:
description: System- / Container-Monitoring
tier: 3
category: ops
container_name: glances
dependencies:
- traefik
url: https://glances.kaleschke.info
dump_file: null
data_paths: []
first_check: "Docker-Socket lesbar? rootfs gemountet? Traefik erreichbar?"
notes: "rebuildbar; Docker-Socket und rootfs Mounts"
borg-ui:
description: Borg Backup- / Restore UI
tier: 3
category: ops
container_name: borg-ui
dependencies:
- traefik
url: https://borg.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/borg-ui/data
- /mnt/user/backups/borg/dumps
first_check: "Borg-Repo-Credentials vorhanden? Backup-Mounts erreichbar? Traefik healthy?"
notes: "breite Mounts bewusst dokumentiert; /local/secrets im DR-Scope"
hermes-gateway:
description: Hermes Agent Gateway / AI Ops Assistant
tier: 3
category: ops
container_name: hermes-gateway
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/hermes-agent/data
first_check: "hermes_net:8642/health erreichbar? SSH-Key gemountet? LLM-Provider erreichbar?"
notes: "kein Docker-Socket; SSH terminal backend; echte .env auf Host-Appdata"
ddns-updater:
description: Cloudflare / DDNS Aktualisierung
tier: 3
category: infra
container_name: ddns-updater
dependencies: []
url: null
dump_file: null
data_paths:
- /mnt/user/appdata/ddns-updater
first_check: "Internetzugang? Cloudflare API erreichbar? Config vorhanden?"
notes: "bewusst in frontend_net weil backend_net internal ist"
code-server:
description: Web-Editor / Operations Workspace
tier: 3
category: ops
container_name: code-server
dependencies:
- traefik
url: https://code.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/code-server
- /mnt/user/services/dev
first_check: "Traefik erreichbar? PASSWORD_FILE lesbar?"
notes: "PASSWORD_FILE; Workspaces bei Restore beachten"
filebrowser:
description: Datei-Browser fuer Appdata
tier: 3
category: ops
container_name: filebrowser
dependencies:
- traefik
url: https://files.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/filebrowser
first_check: "Appdata-Mounts erreichbar? Traefik healthy?"
notes: "breiter /mnt/user/appdata Mount; Einschraenkung langfristig als TODO"
speedtest-tracker:
description: Speedtest-Monitoring
tier: 3
category: ops
container_name: speedtest-tracker
dependencies:
- traefik
url: https://speedtest.kaleschke.info
dump_file: null
data_paths:
- /mnt/user/appdata/speedtest-tracker/config
first_check: "APP_KEY gesetzt? Internetzugang fuer Speedtest vorhanden?"
notes: "APP_KEY, ADMIN_PASSWORD als Stack ENV"
bentopdf:
description: PDF-Tooling
tier: 3
category: app
container_name: bentopdf
dependencies:
- traefik
url: https://pdf.kaleschke.info
dump_file: null
data_paths: []
first_check: "COOP/COEP Middleware gesetzt? Traefik healthy?"
notes: "rebuildbar; keine kritische Persistenz; Live-Status pruefen"