Add Loki Alloy logging baseline
This commit is contained in:
@@ -9,7 +9,7 @@ Monitoring-Stack fuer Grafana + InfluxDB 3 Core. InfluxDB bleibt ohne Public Rou
|
||||
- Grafana wird ueber Traefik + `authelia@file,secure-headers@file` unter `grafana.kaleschke.info` veroeffentlicht.
|
||||
- InfluxDB bleibt ohne Traefik-Route. Der HTTP-Port `8181` kann fuer interne Writer wie Home Assistant ueber `INFLUXDB_BIND_IP` auf eine LAN-Adresse gebunden werden; Default ist `127.0.0.1`.
|
||||
- InfluxDB haengt an zwei Compose-Netzen: `grafana_influx_internal` fuer Grafana und `grafana_influx_lan` fuer das Docker Host-Port-Publishing. Im laufenden Komodo-Stack heissen sie durch den Compose-Projektpraefix `grafana_grafana_influx_internal` und `grafana_grafana_influx_lan`. InfluxDB haengt bewusst nicht im `frontend_net`.
|
||||
- Grafana provisioning legt eine SQL-Datenquelle fuer InfluxDB 3 Core mit der Datenbank `homelab` an.
|
||||
- Grafana provisioning legt eine SQL-Datenquelle fuer InfluxDB 3 Core mit der Datenbank `homelab` und eine Loki-Datasource fuer Container-Logs an.
|
||||
- Der Grafana-Datasource-Token liegt als Secret-Datei auf dem Host und wird beim Containerstart nur containerintern in die fuer Grafana-Provisioning noetige Environment-Variable geladen.
|
||||
- Home Assistant schreibt mit der InfluxDB-v2-API-Kompatibilitaet nach InfluxDB 3; Details: `docs/HOME_ASSISTANT_INFLUXDB_ECOWITT.md`.
|
||||
|
||||
@@ -39,12 +39,15 @@ Monitoring-Stack fuer Grafana + InfluxDB 3 Core. InfluxDB bleibt ohne Public Rou
|
||||
install -m 600 /dev/null /mnt/user/appdata/secrets/grafana_influxdb_token.txt
|
||||
```
|
||||
|
||||
4. Provisioning-Datei aus dem Git-Checkout auf den Host-Appdata-Pfad kopieren:
|
||||
4. Provisioning-Dateien aus dem Git-Checkout auf den Host-Appdata-Pfad kopieren:
|
||||
|
||||
```bash
|
||||
mkdir -p /mnt/user/appdata/grafana/provisioning/datasources
|
||||
mkdir -p /mnt/user/appdata/grafana/provisioning/dashboards
|
||||
cp /mnt/user/appdata/komodo/core/repos/homelab-infra/ops/grafana-influxdb/provisioning/datasources/influxdb.yml /mnt/user/appdata/grafana/provisioning/datasources/influxdb.yml
|
||||
cp /mnt/user/appdata/komodo/core/repos/homelab-infra/ops/grafana-influxdb/provisioning/dashboards/* /mnt/user/appdata/grafana/provisioning/dashboards/
|
||||
chmod 644 /mnt/user/appdata/grafana/provisioning/datasources/influxdb.yml
|
||||
chmod 644 /mnt/user/appdata/grafana/provisioning/dashboards/*
|
||||
```
|
||||
|
||||
5. Nach dem ersten Start die Datenbank anlegen:
|
||||
@@ -57,6 +60,8 @@ Monitoring-Stack fuer Grafana + InfluxDB 3 Core. InfluxDB bleibt ohne Public Rou
|
||||
|
||||
- `https://grafana.kaleschke.info` oeffnet nach Authelia die Grafana-Loginseite.
|
||||
- Grafana `Connections -> Data sources -> InfluxDB 3 Core -> Save & test` ist erfolgreich.
|
||||
- Grafana `Connections -> Data sources -> Loki -> Save & test` ist erfolgreich, sobald der Loki/Alloy-Stack laeuft.
|
||||
- Die provisionierten Dashboards `Logs - Last 60m`, `Container Restart Events` und `Container Error Rate` sind sichtbar.
|
||||
- InfluxDB bleibt ohne Public Route. Falls `INFLUXDB_BIND_IP` auf die LAN-IP gesetzt ist, ist Port `8181` nur im internen Netz fuer Writer wie Home Assistant erreichbar.
|
||||
- `docker ps` zeigt fuer `influxdb3-core` `192.168.178.58:8181->8181/tcp` oder den per `INFLUXDB_BIND_IP` gesetzten Host.
|
||||
- `ss -ltnp | grep 8181` zeigt einen Listener auf der gebundenen Host-IP.
|
||||
|
||||
@@ -26,6 +26,7 @@ services:
|
||||
- grafana_influxdb_token
|
||||
networks:
|
||||
- frontend_net
|
||||
- backend_net
|
||||
- grafana_influx_internal
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
@@ -82,6 +83,8 @@ secrets:
|
||||
networks:
|
||||
frontend_net:
|
||||
external: true
|
||||
backend_net:
|
||||
external: true
|
||||
grafana_influx_lan:
|
||||
driver: bridge
|
||||
grafana_influx_internal:
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"uid": "kallilab-container-error-rate",
|
||||
"title": "Container Error Rate",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "5m",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "table",
|
||||
"title": "Container Errors Last 24h",
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "sum by (container_name) (count_over_time({platform=\"docker\"} |~ \"(?i)(level=error|error|fatal|panic)\" [24h]))"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 16, "w": 24, "x": 0, "y": 0 }
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"uid": "kallilab-logs-last-60m",
|
||||
"title": "Last 60 min before now",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-60m", "to": "now" },
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "container",
|
||||
"type": "query",
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"query": "label_values(container_name)",
|
||||
"includeAll": true,
|
||||
"allValue": ".+",
|
||||
"refresh": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "logs",
|
||||
"title": "Docker Log Stream",
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "{platform=\"docker\", container_name=~\"$container\"}"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 20, "w": 24, "x": 0, "y": 0 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": true,
|
||||
"wrapLogMessage": false,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: KalliLab Observability
|
||||
orgId: 1
|
||||
folder: KalliLab Observability
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 60
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"uid": "kallilab-restart-events",
|
||||
"title": "Restart Events",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "5m",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "heatmap",
|
||||
"title": "Restart-like Log Events",
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "sum by (container_name) (count_over_time({platform=\"docker\"} |~ \"(?i)(restart|restarting|started|exited|oom)\" [5m]))"
|
||||
}
|
||||
],
|
||||
"gridPos": { "h": 16, "w": 24, "x": 0, "y": 0 }
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -16,3 +16,11 @@ datasources:
|
||||
insecureGrpc: true
|
||||
secureJsonData:
|
||||
token: $GRAFANA_INFLUXDB_TOKEN
|
||||
- name: Loki
|
||||
uid: loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
isDefault: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
|
||||
Reference in New Issue
Block a user