ops: add monitoring compose stack

This commit is contained in:
2026-05-16 21:59:20 +02:00
parent d345d770c2
commit 58eb53a6a8
6 changed files with 375 additions and 0 deletions
+234
View File
@@ -0,0 +1,234 @@
services:
prometheus:
image: prom/prometheus:v3.7.3
container_name: monitoring-prometheus
restart: unless-stopped
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=30d
- --web.enable-lifecycle
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
networks:
- monitoring_net
extra_hosts:
# Lets Prometheus scrape host-published Traefik metrics on Unraid.
- host.docker.internal:host-gateway
expose:
- "9090"
security_opt:
- no-new-privileges:true
depends_on:
- node-exporter
- cadvisor
loki:
image: grafana/loki:3.7.2
container_name: monitoring-loki
restart: unless-stopped
command:
- -config.file=/etc/loki/loki-config.yml
volumes:
- ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
networks:
- monitoring_net
expose:
- "3100"
security_opt:
- no-new-privileges:true
promtail:
image: grafana/promtail:3.6.10
container_name: monitoring-promtail
restart: unless-stopped
command:
- -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- promtail_positions:/positions
- /var/run/docker.sock:/var/run/docker.sock:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
networks:
- monitoring_net
security_opt:
- no-new-privileges:true
depends_on:
- loki
grafana:
image: grafana/grafana:12.4.3
container_name: monitoring-grafana
restart: unless-stopped
environment:
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
GF_USERS_ALLOW_SIGN_UP: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
networks:
- monitoring_net
- frontend_net
expose:
- "3000"
security_opt:
- no-new-privileges:true
depends_on:
- prometheus
- loki
labels:
- traefik.enable=true
- traefik.docker.network=frontend_net
- traefik.http.routers.monitoring-grafana.rule=Host(`monitoring.kaleschke.info`)
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
- traefik.http.routers.monitoring-grafana.tls=true
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
- traefik.http.routers.monitoring-grafana.middlewares=authelia@docker,secure-headers@file
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
# Docker-provider Authelia middleware requested for this stack.
- traefik.http.middlewares.authelia.forwardauth.address=http://authelia:9091/api/authz/forward-auth
- traefik.http.middlewares.authelia.forwardauth.trustForwardHeader=true
- traefik.http.middlewares.authelia.forwardauth.authResponseHeaders=Remote-User,Remote-Groups,Remote-Name,Remote-Email
grafana-dashboard-importer:
image: python:3.13-alpine
container_name: monitoring-grafana-dashboard-importer
restart: "no"
environment:
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
networks:
- monitoring_net
- frontend_net
security_opt:
- no-new-privileges:true
depends_on:
- grafana
command:
- /bin/sh
- -c
- |
python - <<'PY'
import base64
import json
import os
import time
import urllib.error
import urllib.request
grafana_url = "http://grafana:3000"
password = os.environ["GF_SECURITY_ADMIN_PASSWORD"]
auth = base64.b64encode(f"admin:{password}".encode()).decode()
headers = {
"Authorization": f"Basic {auth}",
"Content-Type": "application/json",
}
def request(path, payload=None, timeout=20):
data = None if payload is None else json.dumps(payload).encode()
req = urllib.request.Request(f"{grafana_url}{path}", data=data, headers=headers)
if payload is not None:
req.method = "POST"
with urllib.request.urlopen(req, timeout=timeout) as response:
body = response.read()
return json.loads(body.decode() or "{}") if body else {}
for _ in range(60):
try:
request("/api/health", timeout=5)
break
except Exception:
time.sleep(2)
else:
raise SystemExit("Grafana did not become ready in time")
dashboards = [
(1860, "Prometheus"),
(14282, "Prometheus"),
(17346, "Prometheus"),
(13639, "Loki"),
]
for dashboard_id, default_datasource in dashboards:
with urllib.request.urlopen(
f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download",
timeout=30,
) as response:
dashboard = json.loads(response.read().decode())
inputs = []
for item in dashboard.get("__inputs", []):
plugin_id = item.get("pluginId", "").lower()
value = "Loki" if plugin_id == "loki" or default_datasource == "Loki" else "Prometheus"
inputs.append({
"name": item.get("name"),
"type": item.get("type", "datasource"),
"pluginId": item.get("pluginId", "prometheus"),
"value": value,
})
request("/api/dashboards/import", {
"dashboard": dashboard,
"overwrite": True,
"inputs": inputs,
})
print(f"Imported Grafana dashboard {dashboard_id}")
PY
node-exporter:
image: prom/node-exporter:v1.9.1
container_name: monitoring-node-exporter
restart: unless-stopped
command:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/rootfs
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/containers/storage/.+)($|/)
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro,rslave
networks:
- monitoring_net
expose:
- "9100"
security_opt:
- no-new-privileges:true
cadvisor:
image: ghcr.io/google/cadvisor:v0.53.0
container_name: monitoring-cadvisor
restart: unless-stopped
command:
- --docker_only=true
- --housekeeping_interval=30s
- --store_container_labels=false
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
networks:
- monitoring_net
expose:
- "8080"
security_opt:
- no-new-privileges:true
networks:
monitoring_net:
name: monitoring_net
driver: bridge
frontend_net:
external: true
volumes:
prometheus_data:
loki_data:
promtail_positions:
grafana_data:
@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: Homelab Imports
orgId: 1
folder: Homelab
type: file
disableDeletion: false
updateIntervalSeconds: 300
allowUiUpdates: true
options:
# Grafana.com dashboard IDs are imported by the compose one-shot importer.
path: /var/lib/grafana/dashboards
@@ -0,0 +1,19 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
jsonData:
timeInterval: 15s
- name: Loki
type: loki
access: proxy
url: http://loki:3100
editable: false
jsonData:
maxLines: 1000
+47
View File
@@ -0,0 +1,47 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2026-05-16
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: 720h
allow_structured_metadata: true
ingestion_rate_mb: 16
ingestion_burst_size_mb: 32
compactor:
working_directory: /loki/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
delete_request_store: filesystem
+28
View File
@@ -0,0 +1,28 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
site: kallilabcore
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- prometheus:9090
- job_name: node-exporter
static_configs:
- targets:
- node-exporter:9100
- job_name: cadvisor
static_configs:
- targets:
- cadvisor:8080
- job_name: traefik
metrics_path: /metrics
static_configs:
# Expects Traefik metrics to be reachable on the Unraid host at port 8082.
- targets:
- host.docker.internal:8082
+34
View File
@@ -0,0 +1,34 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /positions/positions.yml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 10s
relabel_configs:
- source_labels:
- __meta_docker_container_name
regex: /(.+)
target_label: container
- source_labels:
- __meta_docker_container_log_stream
target_label: stream
- source_labels:
- __meta_docker_container_label_com_docker_compose_project
target_label: compose_project
- source_labels:
- __meta_docker_container_label_com_docker_compose_service
target_label: compose_service
# Docker json-file logs live under /var/lib/docker/containers/<id>/<id>-json.log.
- source_labels:
- __meta_docker_container_id
target_label: __path__
replacement: /var/lib/docker/containers/$1/$1-json.log