Prepare final homelab cleanup gates
This commit is contained in:
@@ -17,7 +17,7 @@ Zielzustand: ein zentraler Observability-Stack fuer KalliLab CORE.
|
||||
|
||||
Die alten Pfade `ops/loki` und `ops/grafana-influxdb` wurden am 2026-05-26 aus dem aktiven Repo entfernt. Rollback erfolgt bei Bedarf ueber Git-Historie, nicht ueber parallel gepflegte Compose-Verzeichnisse.
|
||||
|
||||
Live-Stand 2026-05-25: die zehn `monitoring-*` Container laufen produktiv, die alten Container `grafana`, `influxdb3-core`, `loki` und `alloy` sind in Docker nicht mehr vorhanden. Uptime Kuma ist durch Blackbox Exporter, Prometheus-Alerts und das Dashboard `Homelab / Availability` abgeloest.
|
||||
Live-Stand 2026-06-01: die zehn `monitoring-*` Container laufen produktiv, die alten Container `grafana`, `influxdb3-core`, `loki` und `alloy` sind in Docker nicht mehr vorhanden. Uptime Kuma ist durch Blackbox Exporter, Prometheus-Alerts und das Dashboard `Homelab / Availability` abgeloest.
|
||||
|
||||
## Secrets
|
||||
|
||||
@@ -72,7 +72,7 @@ INFLUXDB_BIND_IP=192.168.178.58
|
||||
- Dozzle bleibt abgeloest: `Homelab / Containers + Logs` ersetzt Live-Logs und Error-Rate.
|
||||
- Glances erst stoppen, wenn `Homelab / Host Overview` und `Homelab / Containers + Logs` fuer CPU, RAM, Disk, Network, Container-CPU und Container-RAM passen.
|
||||
- Uptime Kuma ist entfernt; `Homelab / Availability`, Blackbox Exporter und Prometheus-Alerts sind der Zielzustand fuer HTTP-Verfuegbarkeit.
|
||||
- Dashboard-Zielbestand: `Homelab / Availability`, `Homelab / Containers + Logs`, `Homelab / Host Overview`, `Traefik Official Standalone Dashboard`.
|
||||
- Dashboard-Zielbestand: `Homelab / Availability`, `Homelab / Containers + Logs`, `Homelab / Host Overview`, `Homelab / Family Status`, `Traefik Official Standalone Dashboard`.
|
||||
|
||||
## Alerting
|
||||
|
||||
|
||||
@@ -0,0 +1,295 @@
|
||||
{
|
||||
"uid": "homelab-family-status",
|
||||
"title": "Homelab / Family Status",
|
||||
"tags": ["homelab", "family", "status"],
|
||||
"timezone": "browser",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Family Apps Up",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "sum(probe_success{job=\"blackbox-http\", instance=~\"https://(vault|immich|cloud|paperless|mealie|ntfy|glance)\\\\.kaleschke\\\\.info\"})"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": null},
|
||||
{"color": "yellow", "value": 5},
|
||||
{"color": "green", "value": 7}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Family Apps Down",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "count(probe_success{job=\"blackbox-http\", instance=~\"https://(vault|immich|cloud|paperless|mealie|ntfy|glance)\\\\.kaleschke\\\\.info\"}) - sum(probe_success{job=\"blackbox-http\", instance=~\"https://(vault|immich|cloud|paperless|mealie|ntfy|glance)\\\\.kaleschke\\\\.info\"})"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "red", "value": 1}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Backup Age",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(time() - homelab_borg_last_completed_timestamp_seconds) / 3600"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "h",
|
||||
"decimals": 1,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "yellow", "value": 24},
|
||||
{"color": "red", "value": 30}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "stat",
|
||||
"title": "TLS Days Left",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "min((probe_ssl_earliest_cert_expiry{job=\"blackbox-http\"} - time()) / 86400)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "d",
|
||||
"decimals": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": null},
|
||||
{"color": "yellow", "value": 7},
|
||||
{"color": "green", "value": 21}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Critical Containers Down",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 0, "y": 5},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "count(homelab_critical_container_running) - sum(homelab_critical_container_running)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "red", "value": 1}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "stat",
|
||||
"title": "Image Drift",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 6, "y": 5},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "count(homelab_gitops_runtime_image_match) - sum(homelab_gitops_runtime_image_match)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "red", "value": 3}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"type": "stat",
|
||||
"title": "Last Backup OK",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 12, "y": 5},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "homelab_borg_last_success"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bool",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": null},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"type": "stat",
|
||||
"title": "Metrics Age",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 5, "w": 6, "x": 18, "y": 5},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(time() - homelab_textfile_exporter_last_run_timestamp_seconds) / 60"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "m",
|
||||
"decimals": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "yellow", "value": 60},
|
||||
{"color": "red", "value": 120}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}}
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"type": "timeseries",
|
||||
"title": "Family App Availability",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 10},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "probe_success{job=\"blackbox-http\", instance=~\"https://(vault|immich|cloud|paperless|mealie|ntfy|glance)\\\\.kaleschke\\\\.info\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "bool", "min": 0, "max": 1}, "overrides": []},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom"}}
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"type": "timeseries",
|
||||
"title": "Family App Response Time",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 10},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "probe_duration_seconds{job=\"blackbox-http\", instance=~\"https://(vault|immich|cloud|paperless|mealie|ntfy|glance)\\\\.kaleschke\\\\.info\"}",
|
||||
"legendFormat": "{{instance}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "s"}, "overrides": []},
|
||||
"options": {"legend": {"displayMode": "list", "placement": "bottom"}}
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"type": "table",
|
||||
"title": "Public Endpoint Status",
|
||||
"datasource": "Prometheus",
|
||||
"gridPos": {"h": 9, "w": 24, "x": 0, "y": 18},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "probe_http_status_code{job=\"blackbox-http\"}",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {"Time": true, "__name__": true, "job": true},
|
||||
"renameByName": {"Value": "status_code", "instance": "target"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user