Consolidate monitoring target stack

This commit is contained in:
2026-05-17 10:41:29 +02:00
parent 61625a7a1c
commit b7dfdad621
21 changed files with 250 additions and 64 deletions
+58
View File
@@ -0,0 +1,58 @@
# Monitoring Stack
Zielzustand: ein zentraler Observability-Stack fuer KalliLab CORE.
## Enthaltene Dienste
- `monitoring-grafana`: zentrale UI unter `https://monitoring.kaleschke.info`
- `monitoring-prometheus`: Metriken mit 30 Tagen Retention
- `monitoring-loki`: Container-Logs mit 30 Tagen Retention
- `monitoring-promtail`: Docker-Log-Discovery ueber read-only Docker-Socket
- `monitoring-node-exporter`: Host-Metriken
- `monitoring-cadvisor`: Container-Metriken
- `monitoring-influxdb3-core`: InfluxDB 3 Core fuer Home-Assistant-/Ecowitt-Langzeitdaten
Die alten Pfade `ops/loki` und `ops/grafana-influxdb` sind damit abgeloeste Altstaende. Sie bleiben vorerst im Repo als Rollback- und Migrationsreferenz, sollen aber nach erfolgreichem Live-Deploy nicht parallel betrieben werden.
## Secrets
Vor dem Deploy muessen diese Host-Dateien existieren:
```text
/mnt/user/appdata/secrets/monitoring_grafana_admin_password.txt
/mnt/user/appdata/secrets/monitoring_grafana_influxdb_token.txt
/mnt/user/appdata/secrets/influxdb3_admin_token.json
```
Alle Dateien mit Rechten `600` anlegen. Werte niemals ins Git schreiben.
## Stack Environment
Default ist sicher lokal:
```env
INFLUXDB_BIND_IP=127.0.0.1
```
Wenn Home Assistant aus der VM schreiben soll, in Komodo fuer den `monitoring`-Stack setzen:
```env
INFLUXDB_BIND_IP=192.168.178.58
```
## Migration
1. Secrets anlegen.
2. Alten `ops/loki`-Stack stoppen, wenn `monitoring-loki` und `monitoring-promtail` live gehen.
3. Alten `ops/grafana-influxdb`-Stack stoppen, wenn `monitoring-influxdb3-core` und `monitoring-grafana` live getestet sind.
4. `monitoring` via Komodo deployen.
5. Optionales Dashboard-Bootstrap-Profil einmalig ausfuehren.
6. Home Assistant Writer gegen `http://192.168.178.58:8181/` pruefen; `401 Unauthorized` ohne Token ist erwartbar.
## Smoke-Tests
- `https://monitoring.kaleschke.info` leitet zu Authelia.
- Grafana-Datasources `Prometheus`, `Loki` und `InfluxDB 3 Core` testen erfolgreich.
- Prometheus Targets: `prometheus`, `node-exporter`, `cadvisor`, `traefik`.
- Loki zeigt Container-Logs mit Labels `container`, `compose_project`, `compose_service`.
- InfluxDB 3 Core enthaelt die Datenbank `homelab`.
+57 -13
View File
@@ -62,15 +62,24 @@ services:
environment:
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/monitoring_grafana_admin_password
GF_USERS_ALLOW_SIGN_UP: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
entrypoint:
- /bin/sh
- -c
- |
export GRAFANA_INFLUXDB_TOKEN="$$(cat /run/secrets/monitoring_grafana_influxdb_token)"
exec /run.sh
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
networks:
- monitoring_net
- frontend_net
secrets:
- monitoring_grafana_admin_password
- monitoring_grafana_influxdb_token
expose:
- "3000"
security_opt:
@@ -78,6 +87,7 @@ services:
depends_on:
- prometheus
- loki
- influxdb3-core
labels:
- traefik.enable=true
- traefik.docker.network=frontend_net
@@ -85,19 +95,15 @@ services:
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
- traefik.http.routers.monitoring-grafana.tls=true
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
- traefik.http.routers.monitoring-grafana.middlewares=authelia@docker,secure-headers@file
- traefik.http.routers.monitoring-grafana.middlewares=authelia@file,secure-headers@file
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
# Docker-provider Authelia middleware requested for this stack.
- traefik.http.middlewares.authelia.forwardauth.address=http://authelia:9091/api/authz/forward-auth
- traefik.http.middlewares.authelia.forwardauth.trustForwardHeader=true
- traefik.http.middlewares.authelia.forwardauth.authResponseHeaders=Remote-User,Remote-Groups,Remote-Name,Remote-Email
grafana-dashboard-importer:
image: python:3.13-alpine
container_name: monitoring-grafana-dashboard-importer
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
restart: "no"
profiles:
- bootstrap
dns:
- 1.1.1.1
- 8.8.8.8
@@ -108,6 +114,8 @@ services:
- no-new-privileges:true
depends_on:
- grafana
secrets:
- monitoring_grafana_admin_password
command:
- /bin/sh
- -c
@@ -115,13 +123,13 @@ services:
python - <<'PY'
import base64
import json
import os
import time
import urllib.error
import urllib.request
grafana_url = "http://grafana:3000"
password = os.environ["GF_SECURITY_ADMIN_PASSWORD"]
with open("/run/secrets/monitoring_grafana_admin_password", encoding="utf-8") as secret:
password = secret.read().strip()
auth = base64.b64encode(f"admin:{password}".encode()).decode()
headers = {
"Authorization": f"Basic {auth}",
@@ -202,8 +210,7 @@ services:
}, dashboard_id)
print(f"Imported Grafana dashboard {dashboard_id}")
PY
echo "Dashboard import complete; keeping sidecar alive for Komodo status."
sleep infinity
echo "Dashboard import complete."
node-exporter:
image: prom/node-exporter:v1.9.1
@@ -246,10 +253,37 @@ services:
security_opt:
- no-new-privileges:true
influxdb3-core:
image: influxdb:3.9.1-core@sha256:1d58c8b9ac90153ae3a020ede2810c8284933dda50ac71e7573389ab6f012128
container_name: monitoring-influxdb3-core
restart: unless-stopped
ports:
- "${INFLUXDB_BIND_IP:-127.0.0.1}:8181:8181"
command:
- influxdb3
- serve
- --node-id=kallilabcore
- --object-store=file
- --data-dir=/var/lib/influxdb3/data
- --plugin-dir=/var/lib/influxdb3/plugins
- --admin-token-file=/run/secrets/influxdb3_admin_token
volumes:
- influxdb3_data:/var/lib/influxdb3/data
- influxdb3_plugins:/var/lib/influxdb3/plugins
secrets:
- influxdb3_admin_token
networks:
- monitoring_net
- monitoring_influx_lan
security_opt:
- no-new-privileges:true
networks:
monitoring_net:
name: monitoring_net
driver: bridge
monitoring_influx_lan:
driver: bridge
frontend_net:
external: true
@@ -258,3 +292,13 @@ volumes:
loki_data:
promtail_positions:
grafana_data:
influxdb3_data:
influxdb3_plugins:
secrets:
monitoring_grafana_admin_password:
file: /mnt/user/appdata/secrets/monitoring_grafana_admin_password.txt
monitoring_grafana_influxdb_token:
file: /mnt/user/appdata/secrets/monitoring_grafana_influxdb_token.txt
influxdb3_admin_token:
file: /mnt/user/appdata/secrets/influxdb3_admin_token.json
@@ -17,3 +17,17 @@ datasources:
editable: false
jsonData:
maxLines: 1000
- name: InfluxDB 3 Core
uid: monitoring-influxdb3-core
type: influxdb
access: proxy
url: http://influxdb3-core:8181
editable: false
jsonData:
version: SQL
dbName: homelab
httpMode: POST
insecureGrpc: true
secureJsonData:
token: $GRAFANA_INFLUXDB_TOKEN
+3
View File
@@ -0,0 +1,3 @@
# Safe default: local host only.
# Set this to the Unraid LAN IP, for example 192.168.178.58, when a VM such as Home Assistant must write to InfluxDB.
INFLUXDB_BIND_IP=127.0.0.1