diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index e5d129d..6f6e8d1 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -13,9 +13,6 @@ services: - prometheus_data:/prometheus networks: - monitoring_net - extra_hosts: - # Lets Prometheus scrape host-published Traefik metrics on Unraid. - - host.docker.internal:host-gateway expose: - "9090" security_opt: @@ -101,6 +98,9 @@ services: restart: "no" environment: GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} + dns: + - 1.1.1.1 + - 8.8.8.8 networks: - monitoring_net - frontend_net @@ -137,6 +137,22 @@ services: body = response.read() return json.loads(body.decode() or "{}") if body else {} + def import_dashboard(payload, dashboard_id): + for attempt in range(1, 7): + try: + return request("/api/dashboards/import", payload) + except urllib.error.HTTPError as exc: + body = exc.read().decode(errors="replace")[:300] + if attempt == 6: + raise RuntimeError(f"Dashboard {dashboard_id} import failed: {exc.code} {body}") from exc + print(f"Dashboard {dashboard_id} import attempt {attempt} failed: HTTP {exc.code} {body}") + time.sleep(5) + except Exception as exc: + if attempt == 6: + raise + print(f"Dashboard {dashboard_id} import attempt {attempt} failed: {exc}") + time.sleep(5) + for _ in range(60): try: request("/api/health", timeout=5) @@ -153,12 +169,20 @@ services: (13639, "Loki"), ] + def fetch_dashboard(dashboard_id): + url = f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download" + for attempt in range(1, 7): + try: + with urllib.request.urlopen(url, timeout=30) as response: + return json.loads(response.read().decode()) + except Exception as exc: + if attempt == 6: + raise + print(f"Dashboard {dashboard_id} download attempt {attempt} failed: {exc}") + time.sleep(5) + for dashboard_id, default_datasource in dashboards: - with urllib.request.urlopen( - f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download", - timeout=30, - ) as response: - dashboard = json.loads(response.read().decode()) + dashboard = fetch_dashboard(dashboard_id) inputs = [] for item in dashboard.get("__inputs", []): @@ -171,11 +195,11 @@ services: "value": value, }) - request("/api/dashboards/import", { + import_dashboard({ "dashboard": dashboard, "overwrite": True, "inputs": inputs, - }) + }, dashboard_id) print(f"Imported Grafana dashboard {dashboard_id}") PY @@ -191,7 +215,7 @@ services: volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - - /:/rootfs:ro,rslave + - /:/rootfs:ro networks: - monitoring_net expose: diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index f3f3ed4..5a533e9 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -23,6 +23,6 @@ scrape_configs: - job_name: traefik metrics_path: /metrics static_configs: - # Expects Traefik metrics to be reachable on the Unraid host at port 8082. + # Traefik exposes Prometheus metrics internally on its metrics entrypoint. - targets: - - host.docker.internal:8082 + - traefik:8082 diff --git a/traefik/docker-compose.yml b/traefik/docker-compose.yml index 3488b3c..4e6b8e0 100644 --- a/traefik/docker-compose.yml +++ b/traefik/docker-compose.yml @@ -19,6 +19,12 @@ services: - --entrypoints.web.http.redirections.entrypoint.permanent=true - --entrypoints.websecure.address=:443 - --entrypoints.websecure.http.tls=true + - --entrypoints.metrics.address=:8082 + - --metrics.prometheus=true + - --metrics.prometheus.entrypoint=metrics + - --metrics.prometheus.addentrypointslabels=true + - --metrics.prometheus.addrouterslabels=true + - --metrics.prometheus.addserviceslabels=true - --certificatesresolvers.le.acme.email=mi.kaleschke@gmx.de - --certificatesresolvers.le.acme.storage=/letsencrypt/acme.json - --certificatesresolvers.le.acme.dnschallenge=true @@ -39,6 +45,7 @@ services: networks: - frontend_net - backend_net + - monitoring_net labels: - traefik.enable=true - traefik.docker.network=frontend_net @@ -58,6 +65,8 @@ networks: external: true backend_net: external: true + monitoring_net: + external: true secrets: cloudflare_dns_api_token: