Consolidate monitoring target stack
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
# Monitoring Stack
|
||||
|
||||
Zielzustand: ein zentraler Observability-Stack fuer KalliLab CORE.
|
||||
|
||||
## Enthaltene Dienste
|
||||
|
||||
- `monitoring-grafana`: zentrale UI unter `https://monitoring.kaleschke.info`
|
||||
- `monitoring-prometheus`: Metriken mit 30 Tagen Retention
|
||||
- `monitoring-loki`: Container-Logs mit 30 Tagen Retention
|
||||
- `monitoring-promtail`: Docker-Log-Discovery ueber read-only Docker-Socket
|
||||
- `monitoring-node-exporter`: Host-Metriken
|
||||
- `monitoring-cadvisor`: Container-Metriken
|
||||
- `monitoring-influxdb3-core`: InfluxDB 3 Core fuer Home-Assistant-/Ecowitt-Langzeitdaten
|
||||
|
||||
Die alten Pfade `ops/loki` und `ops/grafana-influxdb` sind damit abgeloeste Altstaende. Sie bleiben vorerst im Repo als Rollback- und Migrationsreferenz, sollen aber nach erfolgreichem Live-Deploy nicht parallel betrieben werden.
|
||||
|
||||
## Secrets
|
||||
|
||||
Vor dem Deploy muessen diese Host-Dateien existieren:
|
||||
|
||||
```text
|
||||
/mnt/user/appdata/secrets/monitoring_grafana_admin_password.txt
|
||||
/mnt/user/appdata/secrets/monitoring_grafana_influxdb_token.txt
|
||||
/mnt/user/appdata/secrets/influxdb3_admin_token.json
|
||||
```
|
||||
|
||||
Alle Dateien mit Rechten `600` anlegen. Werte niemals ins Git schreiben.
|
||||
|
||||
## Stack Environment
|
||||
|
||||
Default ist sicher lokal:
|
||||
|
||||
```env
|
||||
INFLUXDB_BIND_IP=127.0.0.1
|
||||
```
|
||||
|
||||
Wenn Home Assistant aus der VM schreiben soll, in Komodo fuer den `monitoring`-Stack setzen:
|
||||
|
||||
```env
|
||||
INFLUXDB_BIND_IP=192.168.178.58
|
||||
```
|
||||
|
||||
## Migration
|
||||
|
||||
1. Secrets anlegen.
|
||||
2. Alten `ops/loki`-Stack stoppen, wenn `monitoring-loki` und `monitoring-promtail` live gehen.
|
||||
3. Alten `ops/grafana-influxdb`-Stack stoppen, wenn `monitoring-influxdb3-core` und `monitoring-grafana` live getestet sind.
|
||||
4. `monitoring` via Komodo deployen.
|
||||
5. Optionales Dashboard-Bootstrap-Profil einmalig ausfuehren.
|
||||
6. Home Assistant Writer gegen `http://192.168.178.58:8181/` pruefen; `401 Unauthorized` ohne Token ist erwartbar.
|
||||
|
||||
## Smoke-Tests
|
||||
|
||||
- `https://monitoring.kaleschke.info` leitet zu Authelia.
|
||||
- Grafana-Datasources `Prometheus`, `Loki` und `InfluxDB 3 Core` testen erfolgreich.
|
||||
- Prometheus Targets: `prometheus`, `node-exporter`, `cadvisor`, `traefik`.
|
||||
- Loki zeigt Container-Logs mit Labels `container`, `compose_project`, `compose_service`.
|
||||
- InfluxDB 3 Core enthaelt die Datenbank `homelab`.
|
||||
@@ -62,15 +62,24 @@ services:
|
||||
environment:
|
||||
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
||||
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/monitoring_grafana_admin_password
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "false"
|
||||
entrypoint:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
export GRAFANA_INFLUXDB_TOKEN="$$(cat /run/secrets/monitoring_grafana_influxdb_token)"
|
||||
exec /run.sh
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
networks:
|
||||
- monitoring_net
|
||||
- frontend_net
|
||||
secrets:
|
||||
- monitoring_grafana_admin_password
|
||||
- monitoring_grafana_influxdb_token
|
||||
expose:
|
||||
- "3000"
|
||||
security_opt:
|
||||
@@ -78,6 +87,7 @@ services:
|
||||
depends_on:
|
||||
- prometheus
|
||||
- loki
|
||||
- influxdb3-core
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.docker.network=frontend_net
|
||||
@@ -85,19 +95,15 @@ services:
|
||||
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
|
||||
- traefik.http.routers.monitoring-grafana.tls=true
|
||||
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
|
||||
- traefik.http.routers.monitoring-grafana.middlewares=authelia@docker,secure-headers@file
|
||||
- traefik.http.routers.monitoring-grafana.middlewares=authelia@file,secure-headers@file
|
||||
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
|
||||
# Docker-provider Authelia middleware requested for this stack.
|
||||
- traefik.http.middlewares.authelia.forwardauth.address=http://authelia:9091/api/authz/forward-auth
|
||||
- traefik.http.middlewares.authelia.forwardauth.trustForwardHeader=true
|
||||
- traefik.http.middlewares.authelia.forwardauth.authResponseHeaders=Remote-User,Remote-Groups,Remote-Name,Remote-Email
|
||||
|
||||
grafana-dashboard-importer:
|
||||
image: python:3.13-alpine
|
||||
container_name: monitoring-grafana-dashboard-importer
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
||||
restart: "no"
|
||||
profiles:
|
||||
- bootstrap
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 8.8.8.8
|
||||
@@ -108,6 +114,8 @@ services:
|
||||
- no-new-privileges:true
|
||||
depends_on:
|
||||
- grafana
|
||||
secrets:
|
||||
- monitoring_grafana_admin_password
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
@@ -115,13 +123,13 @@ services:
|
||||
python - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
grafana_url = "http://grafana:3000"
|
||||
password = os.environ["GF_SECURITY_ADMIN_PASSWORD"]
|
||||
with open("/run/secrets/monitoring_grafana_admin_password", encoding="utf-8") as secret:
|
||||
password = secret.read().strip()
|
||||
auth = base64.b64encode(f"admin:{password}".encode()).decode()
|
||||
headers = {
|
||||
"Authorization": f"Basic {auth}",
|
||||
@@ -202,8 +210,7 @@ services:
|
||||
}, dashboard_id)
|
||||
print(f"Imported Grafana dashboard {dashboard_id}")
|
||||
PY
|
||||
echo "Dashboard import complete; keeping sidecar alive for Komodo status."
|
||||
sleep infinity
|
||||
echo "Dashboard import complete."
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.9.1
|
||||
@@ -246,10 +253,37 @@ services:
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
influxdb3-core:
|
||||
image: influxdb:3.9.1-core@sha256:1d58c8b9ac90153ae3a020ede2810c8284933dda50ac71e7573389ab6f012128
|
||||
container_name: monitoring-influxdb3-core
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${INFLUXDB_BIND_IP:-127.0.0.1}:8181:8181"
|
||||
command:
|
||||
- influxdb3
|
||||
- serve
|
||||
- --node-id=kallilabcore
|
||||
- --object-store=file
|
||||
- --data-dir=/var/lib/influxdb3/data
|
||||
- --plugin-dir=/var/lib/influxdb3/plugins
|
||||
- --admin-token-file=/run/secrets/influxdb3_admin_token
|
||||
volumes:
|
||||
- influxdb3_data:/var/lib/influxdb3/data
|
||||
- influxdb3_plugins:/var/lib/influxdb3/plugins
|
||||
secrets:
|
||||
- influxdb3_admin_token
|
||||
networks:
|
||||
- monitoring_net
|
||||
- monitoring_influx_lan
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
networks:
|
||||
monitoring_net:
|
||||
name: monitoring_net
|
||||
driver: bridge
|
||||
monitoring_influx_lan:
|
||||
driver: bridge
|
||||
frontend_net:
|
||||
external: true
|
||||
|
||||
@@ -258,3 +292,13 @@ volumes:
|
||||
loki_data:
|
||||
promtail_positions:
|
||||
grafana_data:
|
||||
influxdb3_data:
|
||||
influxdb3_plugins:
|
||||
|
||||
secrets:
|
||||
monitoring_grafana_admin_password:
|
||||
file: /mnt/user/appdata/secrets/monitoring_grafana_admin_password.txt
|
||||
monitoring_grafana_influxdb_token:
|
||||
file: /mnt/user/appdata/secrets/monitoring_grafana_influxdb_token.txt
|
||||
influxdb3_admin_token:
|
||||
file: /mnt/user/appdata/secrets/influxdb3_admin_token.json
|
||||
|
||||
@@ -17,3 +17,17 @@ datasources:
|
||||
editable: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
|
||||
- name: InfluxDB 3 Core
|
||||
uid: monitoring-influxdb3-core
|
||||
type: influxdb
|
||||
access: proxy
|
||||
url: http://influxdb3-core:8181
|
||||
editable: false
|
||||
jsonData:
|
||||
version: SQL
|
||||
dbName: homelab
|
||||
httpMode: POST
|
||||
insecureGrpc: true
|
||||
secureJsonData:
|
||||
token: $GRAFANA_INFLUXDB_TOKEN
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
# Safe default: local host only.
|
||||
# Set this to the Unraid LAN IP, for example 192.168.178.58, when a VM such as Home Assistant must write to InfluxDB.
|
||||
INFLUXDB_BIND_IP=127.0.0.1
|
||||
Reference in New Issue
Block a user