50 lines
1.7 KiB
YAML
50 lines
1.7 KiB
YAML
groups:
|
|
- name: homelab-availability
|
|
rules:
|
|
- alert: HomelabEndpointDown
|
|
expr: probe_success{job="blackbox-http"} == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "{{ $labels.instance }} is not reachable"
|
|
description: "Blackbox probe failed for {{ $labels.instance }}."
|
|
|
|
- alert: HomelabEndpointSlow
|
|
expr: probe_duration_seconds{job="blackbox-http"} > 5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "{{ $labels.instance }} is slow"
|
|
description: "Blackbox probe duration is above 5 seconds for {{ $labels.instance }}."
|
|
|
|
- name: homelab-host
|
|
rules:
|
|
- alert: HomelabDiskAlmostFull
|
|
expr: 100 * (1 - node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"}) > 85
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Disk usage high on {{ $labels.mountpoint }}"
|
|
description: "{{ $labels.mountpoint }} is above 85% used."
|
|
|
|
- alert: HomelabHighMemoryUsage
|
|
expr: 100 * (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 90
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Memory usage high"
|
|
description: "Host memory usage is above 90%."
|
|
|
|
- alert: HomelabTraefik5xx
|
|
expr: sum(rate(traefik_service_requests_total{code=~"5.."}[5m])) by (service) > 0
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Traefik 5xx responses for {{ $labels.service }}"
|
|
description: "Traefik reports 5xx responses for {{ $labels.service }}."
|