259 lines
8.3 KiB
YAML
259 lines
8.3 KiB
YAML
services:
|
|
prometheus:
|
|
image: prom/prometheus:v3.7.3
|
|
container_name: monitoring-prometheus
|
|
restart: unless-stopped
|
|
command:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.path=/prometheus
|
|
- --storage.tsdb.retention.time=30d
|
|
- --web.enable-lifecycle
|
|
volumes:
|
|
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- prometheus_data:/prometheus
|
|
networks:
|
|
- monitoring_net
|
|
expose:
|
|
- "9090"
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
depends_on:
|
|
- node-exporter
|
|
- cadvisor
|
|
|
|
loki:
|
|
image: grafana/loki:3.7.2
|
|
container_name: monitoring-loki
|
|
restart: unless-stopped
|
|
command:
|
|
- -config.file=/etc/loki/loki-config.yml
|
|
volumes:
|
|
- ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro
|
|
- loki_data:/loki
|
|
networks:
|
|
- monitoring_net
|
|
expose:
|
|
- "3100"
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
|
|
promtail:
|
|
image: grafana/promtail:3.6.10
|
|
container_name: monitoring-promtail
|
|
restart: unless-stopped
|
|
command:
|
|
- -config.file=/etc/promtail/promtail-config.yml
|
|
volumes:
|
|
- ./promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
|
|
- promtail_positions:/positions
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
networks:
|
|
- monitoring_net
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
depends_on:
|
|
- loki
|
|
|
|
grafana:
|
|
image: grafana/grafana:12.4.3
|
|
container_name: monitoring-grafana
|
|
restart: unless-stopped
|
|
environment:
|
|
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
|
|
GF_SECURITY_ADMIN_USER: admin
|
|
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
|
GF_AUTH_ANONYMOUS_ENABLED: "false"
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
|
networks:
|
|
- monitoring_net
|
|
- frontend_net
|
|
expose:
|
|
- "3000"
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
depends_on:
|
|
- prometheus
|
|
- loki
|
|
labels:
|
|
- traefik.enable=true
|
|
- traefik.docker.network=frontend_net
|
|
- traefik.http.routers.monitoring-grafana.rule=Host(`monitoring.kaleschke.info`)
|
|
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
|
|
- traefik.http.routers.monitoring-grafana.tls=true
|
|
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
|
|
- traefik.http.routers.monitoring-grafana.middlewares=authelia@docker,secure-headers@file
|
|
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
|
|
# Docker-provider Authelia middleware requested for this stack.
|
|
- traefik.http.middlewares.authelia.forwardauth.address=http://authelia:9091/api/authz/forward-auth
|
|
- traefik.http.middlewares.authelia.forwardauth.trustForwardHeader=true
|
|
- traefik.http.middlewares.authelia.forwardauth.authResponseHeaders=Remote-User,Remote-Groups,Remote-Name,Remote-Email
|
|
|
|
grafana-dashboard-importer:
|
|
image: python:3.13-alpine
|
|
container_name: monitoring-grafana-dashboard-importer
|
|
restart: "no"
|
|
environment:
|
|
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
|
dns:
|
|
- 1.1.1.1
|
|
- 8.8.8.8
|
|
networks:
|
|
- monitoring_net
|
|
- frontend_net
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
depends_on:
|
|
- grafana
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
python - <<'PY'
|
|
import base64
|
|
import json
|
|
import os
|
|
import time
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
grafana_url = "http://grafana:3000"
|
|
password = os.environ["GF_SECURITY_ADMIN_PASSWORD"]
|
|
auth = base64.b64encode(f"admin:{password}".encode()).decode()
|
|
headers = {
|
|
"Authorization": f"Basic {auth}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
def request(path, payload=None, timeout=20):
|
|
data = None if payload is None else json.dumps(payload).encode()
|
|
req = urllib.request.Request(f"{grafana_url}{path}", data=data, headers=headers)
|
|
if payload is not None:
|
|
req.method = "POST"
|
|
with urllib.request.urlopen(req, timeout=timeout) as response:
|
|
body = response.read()
|
|
return json.loads(body.decode() or "{}") if body else {}
|
|
|
|
def import_dashboard(payload, dashboard_id):
|
|
for attempt in range(1, 7):
|
|
try:
|
|
return request("/api/dashboards/import", payload)
|
|
except urllib.error.HTTPError as exc:
|
|
body = exc.read().decode(errors="replace")[:300]
|
|
if attempt == 6:
|
|
raise RuntimeError(f"Dashboard {dashboard_id} import failed: {exc.code} {body}") from exc
|
|
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: HTTP {exc.code} {body}")
|
|
time.sleep(5)
|
|
except Exception as exc:
|
|
if attempt == 6:
|
|
raise
|
|
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: {exc}")
|
|
time.sleep(5)
|
|
|
|
for _ in range(60):
|
|
try:
|
|
request("/api/health", timeout=5)
|
|
break
|
|
except Exception:
|
|
time.sleep(2)
|
|
else:
|
|
raise SystemExit("Grafana did not become ready in time")
|
|
|
|
dashboards = [
|
|
(1860, "Prometheus"),
|
|
(14282, "Prometheus"),
|
|
(17346, "Prometheus"),
|
|
(13639, "Loki"),
|
|
]
|
|
|
|
def fetch_dashboard(dashboard_id):
|
|
url = f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download"
|
|
for attempt in range(1, 7):
|
|
try:
|
|
with urllib.request.urlopen(url, timeout=30) as response:
|
|
return json.loads(response.read().decode())
|
|
except Exception as exc:
|
|
if attempt == 6:
|
|
raise
|
|
print(f"Dashboard {dashboard_id} download attempt {attempt} failed: {exc}")
|
|
time.sleep(5)
|
|
|
|
for dashboard_id, default_datasource in dashboards:
|
|
dashboard = fetch_dashboard(dashboard_id)
|
|
|
|
inputs = []
|
|
for item in dashboard.get("__inputs", []):
|
|
plugin_id = item.get("pluginId", "").lower()
|
|
value = "Loki" if plugin_id == "loki" or default_datasource == "Loki" else "Prometheus"
|
|
inputs.append({
|
|
"name": item.get("name"),
|
|
"type": item.get("type", "datasource"),
|
|
"pluginId": item.get("pluginId", "prometheus"),
|
|
"value": value,
|
|
})
|
|
|
|
import_dashboard({
|
|
"dashboard": dashboard,
|
|
"overwrite": True,
|
|
"inputs": inputs,
|
|
}, dashboard_id)
|
|
print(f"Imported Grafana dashboard {dashboard_id}")
|
|
PY
|
|
|
|
node-exporter:
|
|
image: prom/node-exporter:v1.9.1
|
|
container_name: monitoring-node-exporter
|
|
restart: unless-stopped
|
|
command:
|
|
- --path.procfs=/host/proc
|
|
- --path.sysfs=/host/sys
|
|
- --path.rootfs=/rootfs
|
|
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/containers/storage/.+)($|/)
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
networks:
|
|
- monitoring_net
|
|
expose:
|
|
- "9100"
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
|
|
cadvisor:
|
|
image: ghcr.io/google/cadvisor:v0.53.0
|
|
container_name: monitoring-cadvisor
|
|
restart: unless-stopped
|
|
command:
|
|
- --docker_only=true
|
|
- --housekeeping_interval=30s
|
|
- --store_container_labels=false
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker:/var/lib/docker:ro
|
|
- /dev/disk:/dev/disk:ro
|
|
networks:
|
|
- monitoring_net
|
|
expose:
|
|
- "8080"
|
|
security_opt:
|
|
- no-new-privileges:true
|
|
|
|
networks:
|
|
monitoring_net:
|
|
name: monitoring_net
|
|
driver: bridge
|
|
frontend_net:
|
|
external: true
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
loki_data:
|
|
promtail_positions:
|
|
grafana_data:
|