ops: wire monitoring stack to traefik metrics

This commit is contained in:
2026-05-16 22:10:43 +02:00
parent 58eb53a6a8
commit 6e28ea94d2
3 changed files with 46 additions and 13 deletions
+35 -11
View File
@@ -13,9 +13,6 @@ services:
- prometheus_data:/prometheus
networks:
- monitoring_net
extra_hosts:
# Lets Prometheus scrape host-published Traefik metrics on Unraid.
- host.docker.internal:host-gateway
expose:
- "9090"
security_opt:
@@ -101,6 +98,9 @@ services:
restart: "no"
environment:
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
dns:
- 1.1.1.1
- 8.8.8.8
networks:
- monitoring_net
- frontend_net
@@ -137,6 +137,22 @@ services:
body = response.read()
return json.loads(body.decode() or "{}") if body else {}
def import_dashboard(payload, dashboard_id):
for attempt in range(1, 7):
try:
return request("/api/dashboards/import", payload)
except urllib.error.HTTPError as exc:
body = exc.read().decode(errors="replace")[:300]
if attempt == 6:
raise RuntimeError(f"Dashboard {dashboard_id} import failed: {exc.code} {body}") from exc
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: HTTP {exc.code} {body}")
time.sleep(5)
except Exception as exc:
if attempt == 6:
raise
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: {exc}")
time.sleep(5)
for _ in range(60):
try:
request("/api/health", timeout=5)
@@ -153,12 +169,20 @@ services:
(13639, "Loki"),
]
def fetch_dashboard(dashboard_id):
url = f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download"
for attempt in range(1, 7):
try:
with urllib.request.urlopen(url, timeout=30) as response:
return json.loads(response.read().decode())
except Exception as exc:
if attempt == 6:
raise
print(f"Dashboard {dashboard_id} download attempt {attempt} failed: {exc}")
time.sleep(5)
for dashboard_id, default_datasource in dashboards:
with urllib.request.urlopen(
f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download",
timeout=30,
) as response:
dashboard = json.loads(response.read().decode())
dashboard = fetch_dashboard(dashboard_id)
inputs = []
for item in dashboard.get("__inputs", []):
@@ -171,11 +195,11 @@ services:
"value": value,
})
request("/api/dashboards/import", {
import_dashboard({
"dashboard": dashboard,
"overwrite": True,
"inputs": inputs,
})
}, dashboard_id)
print(f"Imported Grafana dashboard {dashboard_id}")
PY
@@ -191,7 +215,7 @@ services:
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro,rslave
- /:/rootfs:ro
networks:
- monitoring_net
expose:
+2 -2
View File
@@ -23,6 +23,6 @@ scrape_configs:
- job_name: traefik
metrics_path: /metrics
static_configs:
# Expects Traefik metrics to be reachable on the Unraid host at port 8082.
# Traefik exposes Prometheus metrics internally on its metrics entrypoint.
- targets:
- host.docker.internal:8082
- traefik:8082
+9
View File
@@ -19,6 +19,12 @@ services:
- --entrypoints.web.http.redirections.entrypoint.permanent=true
- --entrypoints.websecure.address=:443
- --entrypoints.websecure.http.tls=true
- --entrypoints.metrics.address=:8082
- --metrics.prometheus=true
- --metrics.prometheus.entrypoint=metrics
- --metrics.prometheus.addentrypointslabels=true
- --metrics.prometheus.addrouterslabels=true
- --metrics.prometheus.addserviceslabels=true
- --certificatesresolvers.le.acme.email=mi.kaleschke@gmx.de
- --certificatesresolvers.le.acme.storage=/letsencrypt/acme.json
- --certificatesresolvers.le.acme.dnschallenge=true
@@ -39,6 +45,7 @@ services:
networks:
- frontend_net
- backend_net
- monitoring_net
labels:
- traefik.enable=true
- traefik.docker.network=frontend_net
@@ -58,6 +65,8 @@ networks:
external: true
backend_net:
external: true
monitoring_net:
external: true
secrets:
cloudflare_dns_api_token: