ops: add monitoring compose stack
This commit is contained in:
@@ -0,0 +1,234 @@
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:v3.7.3
|
||||
container_name: monitoring-prometheus
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --storage.tsdb.retention.time=30d
|
||||
- --web.enable-lifecycle
|
||||
volumes:
|
||||
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus_data:/prometheus
|
||||
networks:
|
||||
- monitoring_net
|
||||
extra_hosts:
|
||||
# Lets Prometheus scrape host-published Traefik metrics on Unraid.
|
||||
- host.docker.internal:host-gateway
|
||||
expose:
|
||||
- "9090"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
depends_on:
|
||||
- node-exporter
|
||||
- cadvisor
|
||||
|
||||
loki:
|
||||
image: grafana/loki:3.7.2
|
||||
container_name: monitoring-loki
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- -config.file=/etc/loki/loki-config.yml
|
||||
volumes:
|
||||
- ./loki/loki-config.yml:/etc/loki/loki-config.yml:ro
|
||||
- loki_data:/loki
|
||||
networks:
|
||||
- monitoring_net
|
||||
expose:
|
||||
- "3100"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:3.6.10
|
||||
container_name: monitoring-promtail
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- -config.file=/etc/promtail/promtail-config.yml
|
||||
volumes:
|
||||
- ./promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
|
||||
- promtail_positions:/positions
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||||
networks:
|
||||
- monitoring_net
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
depends_on:
|
||||
- loki
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.4.3
|
||||
container_name: monitoring-grafana
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
GF_SERVER_ROOT_URL: https://monitoring.kaleschke.info/
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "false"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
networks:
|
||||
- monitoring_net
|
||||
- frontend_net
|
||||
expose:
|
||||
- "3000"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
depends_on:
|
||||
- prometheus
|
||||
- loki
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.docker.network=frontend_net
|
||||
- traefik.http.routers.monitoring-grafana.rule=Host(`monitoring.kaleschke.info`)
|
||||
- traefik.http.routers.monitoring-grafana.entrypoints=websecure
|
||||
- traefik.http.routers.monitoring-grafana.tls=true
|
||||
- traefik.http.routers.monitoring-grafana.tls.certresolver=le
|
||||
- traefik.http.routers.monitoring-grafana.middlewares=authelia@docker,secure-headers@file
|
||||
- traefik.http.services.monitoring-grafana.loadbalancer.server.port=3000
|
||||
# Docker-provider Authelia middleware requested for this stack.
|
||||
- traefik.http.middlewares.authelia.forwardauth.address=http://authelia:9091/api/authz/forward-auth
|
||||
- traefik.http.middlewares.authelia.forwardauth.trustForwardHeader=true
|
||||
- traefik.http.middlewares.authelia.forwardauth.authResponseHeaders=Remote-User,Remote-Groups,Remote-Name,Remote-Email
|
||||
|
||||
grafana-dashboard-importer:
|
||||
image: python:3.13-alpine
|
||||
container_name: monitoring-grafana-dashboard-importer
|
||||
restart: "no"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD}
|
||||
networks:
|
||||
- monitoring_net
|
||||
- frontend_net
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
depends_on:
|
||||
- grafana
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
python - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
grafana_url = "http://grafana:3000"
|
||||
password = os.environ["GF_SECURITY_ADMIN_PASSWORD"]
|
||||
auth = base64.b64encode(f"admin:{password}".encode()).decode()
|
||||
headers = {
|
||||
"Authorization": f"Basic {auth}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def request(path, payload=None, timeout=20):
|
||||
data = None if payload is None else json.dumps(payload).encode()
|
||||
req = urllib.request.Request(f"{grafana_url}{path}", data=data, headers=headers)
|
||||
if payload is not None:
|
||||
req.method = "POST"
|
||||
with urllib.request.urlopen(req, timeout=timeout) as response:
|
||||
body = response.read()
|
||||
return json.loads(body.decode() or "{}") if body else {}
|
||||
|
||||
for _ in range(60):
|
||||
try:
|
||||
request("/api/health", timeout=5)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(2)
|
||||
else:
|
||||
raise SystemExit("Grafana did not become ready in time")
|
||||
|
||||
dashboards = [
|
||||
(1860, "Prometheus"),
|
||||
(14282, "Prometheus"),
|
||||
(17346, "Prometheus"),
|
||||
(13639, "Loki"),
|
||||
]
|
||||
|
||||
for dashboard_id, default_datasource in dashboards:
|
||||
with urllib.request.urlopen(
|
||||
f"https://grafana.com/api/dashboards/{dashboard_id}/revisions/latest/download",
|
||||
timeout=30,
|
||||
) as response:
|
||||
dashboard = json.loads(response.read().decode())
|
||||
|
||||
inputs = []
|
||||
for item in dashboard.get("__inputs", []):
|
||||
plugin_id = item.get("pluginId", "").lower()
|
||||
value = "Loki" if plugin_id == "loki" or default_datasource == "Loki" else "Prometheus"
|
||||
inputs.append({
|
||||
"name": item.get("name"),
|
||||
"type": item.get("type", "datasource"),
|
||||
"pluginId": item.get("pluginId", "prometheus"),
|
||||
"value": value,
|
||||
})
|
||||
|
||||
request("/api/dashboards/import", {
|
||||
"dashboard": dashboard,
|
||||
"overwrite": True,
|
||||
"inputs": inputs,
|
||||
})
|
||||
print(f"Imported Grafana dashboard {dashboard_id}")
|
||||
PY
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.9.1
|
||||
container_name: monitoring-node-exporter
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run|var/lib/docker/.+|var/lib/containers/storage/.+)($|/)
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro,rslave
|
||||
networks:
|
||||
- monitoring_net
|
||||
expose:
|
||||
- "9100"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
cadvisor:
|
||||
image: ghcr.io/google/cadvisor:v0.53.0
|
||||
container_name: monitoring-cadvisor
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --docker_only=true
|
||||
- --housekeeping_interval=30s
|
||||
- --store_container_labels=false
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker:/var/lib/docker:ro
|
||||
- /dev/disk:/dev/disk:ro
|
||||
networks:
|
||||
- monitoring_net
|
||||
expose:
|
||||
- "8080"
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
|
||||
networks:
|
||||
monitoring_net:
|
||||
name: monitoring_net
|
||||
driver: bridge
|
||||
frontend_net:
|
||||
external: true
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
loki_data:
|
||||
promtail_positions:
|
||||
grafana_data:
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Homelab Imports
|
||||
orgId: 1
|
||||
folder: Homelab
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 300
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
# Grafana.com dashboard IDs are imported by the compose one-shot importer.
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -0,0 +1,19 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
jsonData:
|
||||
timeInterval: 15s
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
editable: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
@@ -0,0 +1,47 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2026-05-16
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: 720h
|
||||
allow_structured_metadata: true
|
||||
ingestion_rate_mb: 16
|
||||
ingestion_burst_size_mb: 32
|
||||
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
delete_request_store: filesystem
|
||||
@@ -0,0 +1,28 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
site: kallilabcore
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- prometheus:9090
|
||||
|
||||
- job_name: node-exporter
|
||||
static_configs:
|
||||
- targets:
|
||||
- node-exporter:9100
|
||||
|
||||
- job_name: cadvisor
|
||||
static_configs:
|
||||
- targets:
|
||||
- cadvisor:8080
|
||||
|
||||
- job_name: traefik
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
# Expects Traefik metrics to be reachable on the Unraid host at port 8082.
|
||||
- targets:
|
||||
- host.docker.internal:8082
|
||||
@@ -0,0 +1,34 @@
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /positions/positions.yml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
- job_name: docker
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 10s
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
- __meta_docker_container_name
|
||||
regex: /(.+)
|
||||
target_label: container
|
||||
- source_labels:
|
||||
- __meta_docker_container_log_stream
|
||||
target_label: stream
|
||||
- source_labels:
|
||||
- __meta_docker_container_label_com_docker_compose_project
|
||||
target_label: compose_project
|
||||
- source_labels:
|
||||
- __meta_docker_container_label_com_docker_compose_service
|
||||
target_label: compose_service
|
||||
# Docker json-file logs live under /var/lib/docker/containers/<id>/<id>-json.log.
|
||||
- source_labels:
|
||||
- __meta_docker_container_id
|
||||
target_label: __path__
|
||||
replacement: /var/lib/docker/containers/$1/$1-json.log
|
||||
Reference in New Issue
Block a user