ops: wire monitoring stack to traefik metrics
This commit is contained in:
@@ -13,9 +13,6 @@ services:
|
||||
- prometheus_data:/prometheus
|
||||
networks:
|
||||
- monitoring_net
|
||||
extra_hosts:
|
||||
# Lets Prometheus scrape host-published Traefik metrics on Unraid.
|
||||
- host.docker.internal:host-gateway
|
||||
expose:
|
||||
- "9090"
|
||||
security_opt:
|
||||
@@ -101,6 +98,9 @@ services:
|
||||
restart: "no"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
||||
dns:
|
||||
- 1.1.1.1
|
||||
- 8.8.8.8
|
||||
networks:
|
||||
- monitoring_net
|
||||
- frontend_net
|
||||
@@ -137,6 +137,22 @@ services:
|
||||
body = response.read()
|
||||
return json.loads(body.decode() or "{}") if body else {}
|
||||
|
||||
def import_dashboard(payload, dashboard_id):
|
||||
for attempt in range(1, 7):
|
||||
try:
|
||||
return request("/api/dashboards/import", payload)
|
||||
except urllib.error.HTTPError as exc:
|
||||
body = exc.read().decode(errors="replace")[:300]
|
||||
if attempt == 6:
|
||||
raise RuntimeError(f"Dashboard {dashboard_id} import failed: {exc.code} {body}") from exc
|
||||
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: HTTP {exc.code} {body}")
|
||||
time.sleep(5)
|
||||
except Exception as exc:
|
||||
if attempt == 6:
|
||||
raise
|
||||
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: {exc}")
|
||||
time.sleep(5)
|
||||
|
||||
for _ in range(60):
|
||||
try:
|
||||
request("/api/health", timeout=5)
|
||||
@@ -153,12 +169,20 @@ services:
|
||||
(13639, "Loki"),
|
||||
]
|
||||
|
||||
def fetch_dashboard(dashboard_id):
|
||||
url = f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download"
|
||||
for attempt in range(1, 7):
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=30) as response:
|
||||
return json.loads(response.read().decode())
|
||||
except Exception as exc:
|
||||
if attempt == 6:
|
||||
raise
|
||||
print(f"Dashboard {dashboard_id} download attempt {attempt} failed: {exc}")
|
||||
time.sleep(5)
|
||||
|
||||
for dashboard_id, default_datasource in dashboards:
|
||||
with urllib.request.urlopen(
|
||||
f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download",
|
||||
timeout=30,
|
||||
) as response:
|
||||
dashboard = json.loads(response.read().decode())
|
||||
dashboard = fetch_dashboard(dashboard_id)
|
||||
|
||||
inputs = []
|
||||
for item in dashboard.get("__inputs", []):
|
||||
@@ -171,11 +195,11 @@ services:
|
||||
"value": value,
|
||||
})
|
||||
|
||||
request("/api/dashboards/import", {
|
||||
import_dashboard({
|
||||
"dashboard": dashboard,
|
||||
"overwrite": True,
|
||||
"inputs": inputs,
|
||||
})
|
||||
}, dashboard_id)
|
||||
print(f"Imported Grafana dashboard {dashboard_id}")
|
||||
PY
|
||||
|
||||
@@ -191,7 +215,7 @@ services:
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro,rslave
|
||||
- /:/rootfs:ro
|
||||
networks:
|
||||
- monitoring_net
|
||||
expose:
|
||||
|
||||
@@ -23,6 +23,6 @@ scrape_configs:
|
||||
- job_name: traefik
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
# Expects Traefik metrics to be reachable on the Unraid host at port 8082.
|
||||
# Traefik exposes Prometheus metrics internally on its metrics entrypoint.
|
||||
- targets:
|
||||
- host.docker.internal:8082
|
||||
- traefik:8082
|
||||
|
||||
@@ -19,6 +19,12 @@ services:
|
||||
- --entrypoints.web.http.redirections.entrypoint.permanent=true
|
||||
- --entrypoints.websecure.address=:443
|
||||
- --entrypoints.websecure.http.tls=true
|
||||
- --entrypoints.metrics.address=:8082
|
||||
- --metrics.prometheus=true
|
||||
- --metrics.prometheus.entrypoint=metrics
|
||||
- --metrics.prometheus.addentrypointslabels=true
|
||||
- --metrics.prometheus.addrouterslabels=true
|
||||
- --metrics.prometheus.addserviceslabels=true
|
||||
- --certificatesresolvers.le.acme.email=mi.kaleschke@gmx.de
|
||||
- --certificatesresolvers.le.acme.storage=/letsencrypt/acme.json
|
||||
- --certificatesresolvers.le.acme.dnschallenge=true
|
||||
@@ -39,6 +45,7 @@ services:
|
||||
networks:
|
||||
- frontend_net
|
||||
- backend_net
|
||||
- monitoring_net
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.docker.network=frontend_net
|
||||
@@ -58,6 +65,8 @@ networks:
|
||||
external: true
|
||||
backend_net:
|
||||
external: true
|
||||
monitoring_net:
|
||||
external: true
|
||||
|
||||
secrets:
|
||||
cloudflare_dns_api_token:
|
||||
|
||||
Reference in New Issue
Block a user