Files
homelab-infra/monitoring/docker-compose.yml
T
Micha 1a4929f9ef Pin monitoring stack images by digest
Reads live RepoDigests of each running monitoring container and
freezes the compose to the exact image manifest. Brings the
monitoring stack to the same digest-pin discipline as the
stateful tier-1 services. influxdb3-core was already pinned.

Affected: prometheus, alertmanager, alertmanager-ntfy-bridge,
blackbox-exporter, loki, promtail, grafana, node-exporter,
cadvisor (plus a second python:3.13-alpine for the bootstrap
dashboard importer).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 15:23:03 +02:00

367 lines
12 KiB
YAML

services:
prometheus:
image: prom/prometheus:v3.7.3@sha256:49214755b6153f90a597adcbff0252cc61069f8ab69ce8411285cd4a560e8038
container_name: monitoring-prometheus
restart: unless-stopped
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=30d
- --web.enable-lifecycle
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
networks:
- monitoring_net
expose:
- "9090"
security_opt:
- no-new-privileges:true
depends_on:
- alertmanager
- blackbox-exporter
- node-exporter
- cadvisor
alertmanager:
image: prom/alertmanager:v0.28.1@sha256:27c475db5fb156cab31d5c18a4251ac7ed567746a2483ff264516437a39b15ba
container_name: monitoring-alertmanager
restart: unless-stopped
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --storage.path=/alertmanager
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager_data:/alertmanager
networks:
- monitoring_net
expose:
- "9093"
security_opt:
- no-new-privileges:true
alertmanager-ntfy-bridge:
image: python:3.13-alpine@sha256:420cd0bf0f3998275875e02ecd5808168cf0843cbb4d3c536432f729247b2acc
container_name: monitoring-alertmanager-ntfy-bridge
restart: unless-stopped
dns:
- 1.1.1.1
- 8.8.8.8
environment:
NTFY_URL: https://ntfy.kaleschke.info/homelab-alerts
command:
- python
- /app/bridge.py
volumes:
- ./alertmanager-ntfy-bridge/bridge.py:/app/bridge.py:ro
networks:
- monitoring_net
expose:
- "8080"
security_opt:
- no-new-privileges:true
blackbox-exporter:
image: prom/blackbox-exporter:v0.27.0@sha256:a50c4c0eda297baa1678cd4dc4712a67fdea713b832d43ce7fcc5f9bea05094d
container_name: monitoring-blackbox-exporter
restart: unless-stopped
dns:
- 1.1.1.1
- 8.8.8.8
command:
- --config.file=/etc/blackbox_exporter/blackbox.yml
volumes:
- ./blackbox/blackbox.yml:/etc/blackbox_exporter/blackbox.yml:ro
networks:
- monitoring_net
expose:
- "9115"
security_opt:
- no-new-privileges:true
loki:
image: grafana/loki:3.7.2@sha256:191d4fdfb7264f16989f0a57f320872620a5a7c2ceeec6229212c4190ec49b86
container_name: monitoring-loki
restart: unless-stopped
command:
- -config.file=/etc/loki/loki-config.yml
volumes:
- ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
networks:
- monitoring_net
expose:
- "3100"
security_opt:
- no-new-privileges:true
promtail:
image: grafana/promtail:3.6.10@sha256:2a0f5e3e160ee5d549c585f6cc4f4e1c566ff783324a424bd75bc16503fc660e
container_name: monitoring-promtail
restart: unless-stopped
command:
- -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- promtail_positions:/positions
- /var/run/docker.sock:/var/run/docker.sock:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
networks:
- monitoring_net
security_opt:
- no-new-privileges:true
depends_on:
- loki
grafana:
image: grafana/grafana:12.4.3@sha256:2e986801428cd689c2358605289c90ab37d2b39e24808874971f54c99bcdc412
container_name: monitoring-grafana
restart: unless-stopped
dns:
- 1.1.1.1
- 8.8.8.8
environment:
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/monitoring_grafana_admin_password
GF_USERS_ALLOW_SIGN_UP: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
entrypoint:
- /bin/sh
- -c
- |
export GRAFANA_INFLUXDB_TOKEN="$$(cat /run/secrets/monitoring_grafana_influxdb_token)"
exec /run.sh
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
networks:
- monitoring_net
- frontend_net
secrets:
- monitoring_grafana_admin_password
- monitoring_grafana_influxdb_token
expose:
- "3000"
security_opt:
- no-new-privileges:true
depends_on:
- prometheus
- loki
- influxdb3-core
labels:
- traefik.enable=true
- traefik.docker.network=frontend_net
- traefik.http.routers.monitoring-grafana.rule=Host(`monitoring.kaleschke.info`)
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
- traefik.http.routers.monitoring-grafana.tls=true
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
- traefik.http.routers.monitoring-grafana.middlewares=authelia@file,secure-headers@file
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
grafana-dashboard-importer:
image: python:3.13-alpine
container_name: monitoring-grafana-dashboard-importer
restart: "no"
profiles:
- bootstrap
dns:
- 1.1.1.1
- 8.8.8.8
networks:
- monitoring_net
- frontend_net
security_opt:
- no-new-privileges:true
depends_on:
- grafana
secrets:
- monitoring_grafana_admin_password
command:
- /bin/sh
- -c
- |
python - <<'PY'
import base64
import json
import time
import urllib.error
import urllib.request
grafana_url = "http://grafana:3000"
with open("/run/secrets/monitoring_grafana_admin_password", encoding="utf-8") as secret:
password = secret.read().strip()
auth = base64.b64encode(f"admin:{password}".encode()).decode()
headers = {
"Authorization": f"Basic {auth}",
"Content-Type": "application/json",
}
def request(path, payload=None, timeout=20):
data = None if payload is None else json.dumps(payload).encode()
req = urllib.request.Request(f"{grafana_url}{path}", data=data, headers=headers)
if payload is not None:
req.method = "POST"
with urllib.request.urlopen(req, timeout=timeout) as response:
body = response.read()
return json.loads(body.decode() or "{}") if body else {}
def import_dashboard(payload, dashboard_id):
for attempt in range(1, 7):
try:
return request("/api/dashboards/import", payload)
except urllib.error.HTTPError as exc:
body = exc.read().decode(errors="replace")[:300]
if attempt == 6:
raise RuntimeError(f"Dashboard {dashboard_id} import failed: {exc.code} {body}") from exc
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: HTTP {exc.code} {body}")
time.sleep(5)
except Exception as exc:
if attempt == 6:
raise
print(f"Dashboard {dashboard_id} import attempt {attempt} failed: {exc}")
time.sleep(5)
for _ in range(60):
try:
request("/api/health", timeout=5)
break
except Exception:
time.sleep(2)
else:
raise SystemExit("Grafana did not become ready in time")
dashboards = [
(17346, "Prometheus"),
]
def fetch_dashboard(dashboard_id):
url = f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download"
for attempt in range(1, 7):
try:
with urllib.request.urlopen(url, timeout=30) as response:
return json.loads(response.read().decode())
except Exception as exc:
if attempt == 6:
raise
print(f"Dashboard {dashboard_id} download attempt {attempt} failed: {exc}")
time.sleep(5)
for dashboard_id, default_datasource in dashboards:
dashboard = fetch_dashboard(dashboard_id)
inputs = []
for item in dashboard.get("__inputs", []):
plugin_id = item.get("pluginId", "").lower()
value = "Loki" if plugin_id == "loki" or default_datasource == "Loki" else "Prometheus"
inputs.append({
"name": item.get("name"),
"type": item.get("type", "datasource"),
"pluginId": item.get("pluginId", "prometheus"),
"value": value,
})
import_dashboard({
"dashboard": dashboard,
"overwrite": True,
"inputs": inputs,
}, dashboard_id)
print(f"Imported Grafana dashboard {dashboard_id}")
PY
echo "Dashboard import complete."
node-exporter:
image: prom/node-exporter:v1.9.1@sha256:d00a542e409ee618a4edc67da14dd48c5da66726bbd5537ab2af9c1dfc442c8a
container_name: monitoring-node-exporter
restart: unless-stopped
command:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/rootfs
- --collector.textfile.directory=/textfile
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/containers/storage/.+)($|/)
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /mnt/user/services/posture-check/textfile:/textfile:ro
networks:
- monitoring_net
expose:
- "9100"
security_opt:
- no-new-privileges:true
cadvisor:
image: ghcr.io/google/cadvisor:v0.53.0@sha256:c3770bd6fc6c6a9cb2b47143e6b3cc3fdd9d20a8453dffbb7e09a145e7e0c4e4
container_name: monitoring-cadvisor
restart: unless-stopped
command:
- --docker_only=true
- --housekeeping_interval=30s
- --store_container_labels=false
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
networks:
- monitoring_net
expose:
- "8080"
security_opt:
- no-new-privileges:true
influxdb3-core:
image: influxdb:3.9.1-core@sha256:1d58c8b9ac90153ae3a020ede2810c8284933dda50ac71e7573389ab6f012128
container_name: monitoring-influxdb3-core
user: "0"
restart: unless-stopped
ports:
- "${INFLUXDB_BIND_IP:-127.0.0.1}:8181:8181"
command:
- influxdb3
- serve
- --node-id=kallilabcore
- --object-store=file
- --data-dir=/var/lib/influxdb3/data
- --plugin-dir=/var/lib/influxdb3/plugins
- --admin-token-file=/run/secrets/influxdb3_admin_token
volumes:
- /mnt/user/appdata/influxdb3/data:/var/lib/influxdb3/data
- /mnt/user/appdata/influxdb3/plugins:/var/lib/influxdb3/plugins
secrets:
- influxdb3_admin_token
networks:
- monitoring_net
- monitoring_influx_lan
security_opt:
- no-new-privileges:true
networks:
monitoring_net:
name: monitoring_net
driver: bridge
monitoring_influx_lan:
driver: bridge
frontend_net:
external: true
volumes:
prometheus_data:
alertmanager_data:
loki_data:
promtail_positions:
grafana_data:
secrets:
monitoring_grafana_admin_password:
file: /mnt/user/appdata/secrets/monitoring_grafana_admin_password.txt
monitoring_grafana_influxdb_token:
file: /mnt/user/appdata/secrets/monitoring_grafana_influxdb_token.txt
influxdb3_admin_token:
file: /mnt/user/appdata/secrets/influxdb3_admin_token.json