Files
homelab-infra/services/posture-check/export-prometheus-textfile.sh

161 lines
6.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
TEXTFILE_DIR="${TEXTFILE_DIR:-/mnt/user/services/posture-check/textfile}"
OUTPUT_FILE="${OUTPUT_FILE:-$TEXTFILE_DIR/homelab.prom}"
BORG_CONTAINER="${BORG_CONTAINER:-borg-ui}"
CRITICAL_CONTAINERS="${CRITICAL_CONTAINERS:-traefik authelia postgresql17 gitea komodo-core komodo-mongo komodo-periphery vaultwarden borg-ui ntfy adguard unbound Tailscale-Docker monitoring-alertmanager monitoring-alertmanager-ntfy-bridge monitoring-blackbox-exporter monitoring-cadvisor monitoring-grafana monitoring-loki monitoring-node-exporter monitoring-promtail immich_server immich_postgres immich_redis paperless-ngx nextcloud nextcloud-postgres nextcloud-redis mealie mealie-postgres}"
mkdir -p "$TEXTFILE_DIR"
tmp="$(mktemp "$TEXTFILE_DIR/homelab.prom.XXXXXX")"
cleanup() {
rm -f "$tmp"
}
trap cleanup EXIT
now="$(date +%s)"
emit_gitops_runtime_image_metrics() {
cat <<'EOF'
# HELP homelab_gitops_runtime_image_match Whether a running Compose container uses the image currently declared by its Compose config.
# TYPE homelab_gitops_runtime_image_match gauge
EOF
if ! command -v jq >/dev/null 2>&1; then
return
fi
docker ps \
--filter label=com.docker.compose.project \
--format '{{.Names}}\t{{.Label "com.docker.compose.project"}}\t{{.Label "com.docker.compose.service"}}\t{{.Label "com.docker.compose.project.config_files"}}\t{{.Label "com.docker.compose.project.environment_file"}}' |
while IFS="$(printf '\t')" read -r container project service config_files env_file; do
[ -n "$container" ] || continue
[ -n "$service" ] || continue
[ -n "$config_files" ] || continue
config_file="${config_files%%,*}"
[ -f "$config_file" ] || continue
compose_args=(-f "$config_file")
if [ -n "$env_file" ] && [ -f "$env_file" ]; then
compose_args+=(--env-file "$env_file")
fi
expected="$(
docker compose "${compose_args[@]}" config --format json 2>/dev/null |
jq -r --arg service "$service" '.services[$service].image // empty' 2>/dev/null || true
)"
[ -n "$expected" ] || continue
running="$(docker inspect -f '{{.Config.Image}}' "$container" 2>/dev/null || true)"
[ -n "$running" ] || continue
match="0"
if [ "$running" = "$expected" ]; then
match="1"
fi
printf 'homelab_gitops_runtime_image_match{name="%s",project="%s",service="%s"} %s\n' \
"$container" "$project" "$service" "$match"
done
}
{
cat <<'EOF'
# HELP homelab_textfile_exporter_last_run_timestamp_seconds Unix timestamp of the last successful homelab textfile exporter run.
# TYPE homelab_textfile_exporter_last_run_timestamp_seconds gauge
EOF
printf 'homelab_textfile_exporter_last_run_timestamp_seconds %s\n' "$now"
cat <<'EOF'
# HELP homelab_critical_container_running Whether a critical container is currently running according to docker inspect.
# TYPE homelab_critical_container_running gauge
EOF
for container in $CRITICAL_CONTAINERS; do
running="0"
if docker inspect -f '{{.State.Running}}' "$container" 2>/dev/null | grep -qx true; then
running="1"
fi
printf 'homelab_critical_container_running{name="%s"} %s\n' "$container" "$running"
done
emit_gitops_runtime_image_metrics
cat <<'EOF'
# HELP homelab_borg_last_completed_timestamp_seconds Unix timestamp of the most recent completed Borg backup job known to Borg UI.
# TYPE homelab_borg_last_completed_timestamp_seconds gauge
# HELP homelab_borg_last_success Whether the most recent Borg backup job completed successfully.
# TYPE homelab_borg_last_success gauge
# HELP homelab_borg_last_job_warning Whether the most recent Borg backup job completed with warnings.
# TYPE homelab_borg_last_job_warning gauge
EOF
if docker inspect "$BORG_CONTAINER" >/dev/null 2>&1; then
docker exec -i "$BORG_CONTAINER" python3 - <<'PY'
import datetime as dt
import sqlite3
conn = sqlite3.connect("/data/borg.db")
conn.row_factory = sqlite3.Row
cur = conn.cursor()
latest = cur.execute("""
select status, completed_at, archive_name
from backup_jobs
order by coalesce(started_at, created_at) desc
limit 1
""").fetchone()
completed = cur.execute("""
select completed_at, archive_name
from backup_jobs
where status in ('completed', 'completed_with_warnings')
and completed_at is not null
order by completed_at desc
limit 1
""").fetchone()
def parse_ts(value):
if not value:
return 0
value = value.replace("Z", "+00:00")
try:
parsed = dt.datetime.fromisoformat(value)
except ValueError:
try:
parsed = dt.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
except ValueError:
return 0
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=dt.timezone.utc)
return int(parsed.timestamp())
def escape_label(value):
return (value or "").replace("\\", "\\\\").replace('"', '\\"')
latest_status = latest["status"] if latest else "missing"
latest_success = 1 if latest_status in ("completed", "completed_with_warnings") else 0
latest_warning = 1 if latest_status == "completed_with_warnings" else 0
completed_ts = parse_ts(completed["completed_at"]) if completed else 0
latest_archive = escape_label(latest["archive_name"] if latest else "")
completed_archive = escape_label(completed["archive_name"] if completed else "")
print(f'homelab_borg_last_success{{status="{latest_status}",archive="{latest_archive}"}} {latest_success}')
print(f'homelab_borg_last_job_warning{{status="{latest_status}",archive="{latest_archive}"}} {latest_warning}')
print(f'homelab_borg_last_completed_timestamp_seconds{{archive="{completed_archive}"}} {completed_ts}')
PY
else
printf 'homelab_borg_last_success{status="container_missing",archive=""} 0\n'
printf 'homelab_borg_last_job_warning{status="container_missing",archive=""} 0\n'
printf 'homelab_borg_last_completed_timestamp_seconds{archive=""} 0\n'
fi
} > "$tmp"
# 0644 statt mktemp-default 0600, damit der node-exporter-Textfile-Collector
# (laeuft als nobody:65534) die Datei lesen kann.
chmod 644 "$tmp"
mv "$tmp" "$OUTPUT_FILE"
trap - EXIT
printf '%s\n' "$OUTPUT_FILE"