#!/usr/bin/env python3 """ check_health.py — Homelab Alert Enricher ========================================= Laedt services.json, prueft Services via HTTP(S) und gibt einen strukturierten JSON-Report aus. Hermes nutzt diesen Report fuer angereicherte ntfy-Alerts. Keine externen Abhaengigkeiten — nur Python-Standardbibliothek. Kein Docker CLI, kein Root, kein pip. Check-Strategie: - Services MIT url: HTTP GET, 2xx/3xx = healthy, Timeout/4xx/5xx = unhealthy - Services OHNE url: "internal" — kein externer Check moeglich - Dump-Timestamps: werden gelesen falls /mnt/user/... erreichbar ist (optional) Verwendung: python3 check_health.py # alle Services pruefen (Tier 1+2) python3 check_health.py paperless-ngx # gezielt einen Service pruefen python3 check_health.py --summary # Gesamtstatus Tier 1+2 python3 check_health.py --all # alle Tiers inkl. Tier 3 Pfad auf der Hermes-VM: /srv/hermes-workspace/homelab-infra/ops/hermes-agent/scripts/check_health.py """ import json import ssl import sys import urllib.request import urllib.error from datetime import datetime from pathlib import Path # --------------------------------------------------------------------------- # Konfiguration # --------------------------------------------------------------------------- SCRIPT_DIR = Path(__file__).parent.resolve() SERVICES_JSON_PATH = SCRIPT_DIR.parent / "services.json" SERVICES_JSON_FALLBACK = Path("/srv/hermes-workspace/homelab-infra/ops/hermes-agent/services.json") # HTTP-Check Timeout in Sekunden HTTP_TIMEOUT = 8 # Dump-Verzeichnis (optional — wird uebersprungen wenn nicht erreichbar) DUMP_BASE_PATHS = [ Path("/mnt/user/backups/borg/dumps/latest"), # Unraid direkt Path("/opt/dumps"), # gemounteter Fallback ] # Dump-Warnschwelle DUMP_WARN_HOURS = 26 # SSL-Verification (True = strikt, False = ignoriert selbstsignierte Zerts) SSL_VERIFY = False # --------------------------------------------------------------------------- # Hilfsfunktionen # --------------------------------------------------------------------------- def load_services(): """Laedt services.json ohne externe Abhaengigkeiten.""" path = SERVICES_JSON_PATH if SERVICES_JSON_PATH.exists() else SERVICES_JSON_FALLBACK if not path.exists(): raise FileNotFoundError( f"services.json nicht gefunden: {path}\n" "Bitte 'git pull' in /srv/hermes-workspace/homelab-infra/ ausfuehren." ) with open(path, encoding="utf-8") as f: data = json.load(f) return data.get("services", {}), data.get("meta", {}) def http_check(url: str) -> dict: """ Fuehrt einen HTTP GET gegen url aus. Gibt {'reachable': bool, 'status_code': int|None, 'error': str|None} zurueck. 2xx und 3xx gelten als healthy. 401/403 auch (Service laeuft, Auth blockiert). """ ctx = ssl.create_default_context() if not SSL_VERIFY: ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE try: req = urllib.request.Request(url, method="GET", headers={"User-Agent": "hermes-healthcheck/1.0"}) with urllib.request.urlopen(req, timeout=HTTP_TIMEOUT, context=ctx) as resp: code = resp.status healthy = code < 500 return {"reachable": healthy, "status_code": code, "error": None} except urllib.error.HTTPError as e: # 4xx = Service laeuft, aber Auth oder Not Found — trotzdem erreichbar healthy = e.code < 500 return {"reachable": healthy, "status_code": e.code, "error": None} except urllib.error.URLError as e: return {"reachable": False, "status_code": None, "error": str(e.reason)} except Exception as e: return {"reachable": False, "status_code": None, "error": str(e)} def check_service(service: dict) -> dict: """ Prueft einen einzelnen Service. Gibt {'healthy': bool, 'method': str, 'detail': dict} zurueck. """ url = service.get("url") if url: result = http_check(url) return { "healthy": result["reachable"], "method": "http", "url": url, "status_code": result["status_code"], "error": result["error"], } else: return { "healthy": None, # None = unbekannt (intern, kein externer Check) "method": "internal", "url": None, "status_code": None, "error": "Kein externer Check — interner Service ohne URL", } def find_dump_base() -> Path | None: """Sucht das Dump-Verzeichnis in bekannten Pfaden.""" for p in DUMP_BASE_PATHS: if p.exists(): return p return None def get_dump_info(dump_file: str | None, dump_base: Path | None) -> dict | None: """Liest Alter und Groesse einer Dump-Datei.""" if not dump_file or not dump_base: return None path = dump_base / dump_file if not path.exists(): return {"file": dump_file, "exists": False, "age_hours": None, "size_mb": None, "warn": False} stat = path.stat() age_hours = round((datetime.now().timestamp() - stat.st_mtime) / 3600, 1) size_mb = round(stat.st_size / 1_048_576, 1) return { "file": dump_file, "exists": True, "age_hours": age_hours, "size_mb": size_mb, "warn": age_hours > DUMP_WARN_HOURS, } # --------------------------------------------------------------------------- # Report-Generierung # --------------------------------------------------------------------------- def build_service_report(service_key: str, service: dict, all_services: dict) -> dict: """Vollstaendiger Report fuer einen einzelnen Service inkl. Abhaengigkeiten.""" dump_base = find_dump_base() # Eigener Status own = check_service(service) # Abhaengigkeiten pruefen dep_results = {} for dep_key in service.get("dependencies", []): dep = all_services.get(dep_key) if not dep: dep_results[dep_key] = {"healthy": None, "method": "unknown", "error": "Nicht in services.json"} continue dep_results[dep_key] = { "tier": dep.get("tier"), "container_name": dep.get("container_name"), **check_service(dep), } # Unhealthy Deps: nur die die definitiv False sind (None = intern = ignorieren) unhealthy_deps = [k for k, v in dep_results.items() if v.get("healthy") is False] # Dump-Info dump_info = get_dump_info(service.get("dump_file"), dump_base) return { "service": service_key, "description": service.get("description", ""), "tier": service.get("tier"), "url": service.get("url"), "status": own, "dependencies": dep_results, "unhealthy_deps": unhealthy_deps, "dump": dump_info, "first_check": service.get("first_check", ""), "notes": service.get("notes", ""), "timestamp": datetime.now().isoformat(), } def build_summary_report(all_services: dict, include_tier3: bool = False) -> dict: """Prueft alle Tier-1 und Tier-2 Services (optional auch Tier-3).""" dump_base = find_dump_base() issues = [] results = {} for key, svc in all_services.items(): tier = svc.get("tier", 3) if not include_tier3 and tier > 2: continue status = check_service(svc) healthy = status.get("healthy") results[key] = { "tier": tier, "method": status["method"], "healthy": healthy, "status_code": status.get("status_code"), "error": status.get("error"), } # Nur echte Fehler als Issue zaehlen (None = intern, nicht pruefbar) if healthy is False: issues.append({ "service": key, "tier": tier, "url": svc.get("url"), **status, }) # Dump-Checks stale_dumps = [] for key, svc in all_services.items(): info = get_dump_info(svc.get("dump_file"), dump_base) if info and info.get("warn"): stale_dumps.append({ "service": key, "file": info["file"], "age_hours": info["age_hours"], }) dump_available = dump_base is not None return { "mode": "summary", "timestamp": datetime.now().isoformat(), "dump_base_found": dump_available, "services_checked": len(results), "issues": issues, "stale_dumps": stale_dumps, "overall_healthy": len(issues) == 0, "note": "Interne Services ohne URL konnten nicht geprueft werden." if any( v["method"] == "internal" for v in results.values() ) else "", } # --------------------------------------------------------------------------- # Einstiegspunkt # --------------------------------------------------------------------------- def main(): args = sys.argv[1:] all_services, _ = load_services() include_all = "--all" in args summary_mode = "--summary" in args if summary_mode or not args or args[0].startswith("--"): report = build_summary_report(all_services, include_tier3=include_all) print(json.dumps(report, indent=2, ensure_ascii=False)) return # Gezielter Service-Key service_key = args[0] service = all_services.get(service_key) if not service: print(json.dumps({"error": f"Service '{service_key}' nicht in services.json gefunden."})) sys.exit(1) report = build_service_report(service_key, service, all_services) print(json.dumps(report, indent=2, ensure_ascii=False)) if __name__ == "__main__": main()