Add custom homelab dashboard stack

This commit is contained in:
2026-04-05 13:43:03 +02:00
parent 450a04a7d3
commit 89b9173c25
38 changed files with 3539 additions and 0 deletions
@@ -0,0 +1 @@
"""External system clients."""
+107
View File
@@ -0,0 +1,107 @@
from __future__ import annotations
import logging
from typing import Any
import httpx
from app.config import Settings
logger = logging.getLogger(__name__)
class BaseHTTPClient:
def __init__(self, settings: Settings, name: str, base_url: str | None) -> None:
self.settings = settings
self.name = name
self.base_url = str(base_url).rstrip("/") if base_url else None
async def _request_json(
self,
method: str,
path: str,
*,
headers: dict[str, str] | None = None,
params: dict[str, Any] | None = None,
auth: tuple[str, str] | None = None,
) -> Any | None:
response = await self._request(
method,
path,
headers=headers,
params=params,
auth=auth,
)
if response is None:
return None
try:
return response.json()
except ValueError:
logger.warning("%s returned non-JSON payload for %s", self.name, path)
return None
async def _request_text(
self,
method: str,
path: str,
*,
headers: dict[str, str] | None = None,
params: dict[str, Any] | None = None,
auth: tuple[str, str] | None = None,
) -> str | None:
response = await self._request(
method,
path,
headers=headers,
params=params,
auth=auth,
)
if response is None:
return None
return response.text
async def _request(
self,
method: str,
path: str,
*,
headers: dict[str, str] | None = None,
params: dict[str, Any] | None = None,
auth: tuple[str, str] | None = None,
) -> httpx.Response | None:
if not self.base_url:
logger.info("%s client skipped because base URL is not configured", self.name)
return None
url = f"{self.base_url}/{path.lstrip('/')}"
try:
async with httpx.AsyncClient(
timeout=self.settings.request_timeout_seconds,
trust_env=False,
) as client:
response = await client.request(
method,
url,
headers=headers,
params=params,
auth=auth,
)
response.raise_for_status()
return response
except httpx.TimeoutException:
logger.warning("%s request timed out: %s %s", self.name, method, url)
except httpx.HTTPStatusError as exc:
logger.warning(
"%s request failed with status %s for %s %s",
self.name,
exc.response.status_code,
method,
url,
)
except httpx.HTTPError as exc:
logger.warning("%s request error for %s %s: %s", self.name, method, url, exc)
return None
@@ -0,0 +1,417 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
import logging
from typing import Any
from app.clients.base import BaseHTTPClient
from app.config import Settings
from app.models.sources import (
BeszelDiskMetric,
BeszelSystemSnapshot,
DockerContainerSummary,
DockerSnapshot,
)
logger = logging.getLogger(__name__)
class BeszelClient(BaseHTTPClient):
"""
Beszel exposes a PocketBase-backed REST API. The exact record schema may
change between Beszel releases, so this client intentionally normalizes
multiple likely field layouts into a stable internal snapshot.
"""
def __init__(self, settings: Settings) -> None:
super().__init__(settings, "beszel", settings.beszel_base_url)
self._admin_jwt: str | None = None
self._admin_jwt_expires_at: datetime | None = None
async def fetch_system_snapshot(self) -> BeszelSystemSnapshot:
snapshot = BeszelSystemSnapshot()
if not self.base_url:
logger.info("beszel skipped: base URL missing")
return snapshot
headers = await self._build_auth_headers()
if headers is None:
logger.warning("beszel skipped: no usable auth method configured")
return snapshot
payload = await self._request_json(
"GET",
"/api/collections/system_stats/records",
headers=headers,
params={"page": 1, "perPage": 1, "sort": "-created"},
)
if not payload:
logger.warning("beszel returned empty payload")
return snapshot
logger.info("beszel raw payload: %s", payload)
items = payload.get("items") if isinstance(payload, dict) else None
if not items:
logger.warning("beszel returned no system_stats records")
return snapshot
record = items[0]
details = await self._fetch_system_details(headers, record)
normalized = self._normalize_snapshot(record, details)
logger.info("beszel normalized snapshot: %s", normalized.model_dump())
return normalized
async def fetch_container_snapshot(self) -> DockerSnapshot:
snapshot = DockerSnapshot()
if not self.base_url:
return snapshot
headers = await self._build_auth_headers()
if headers is None:
return snapshot
payload = await self._request_json(
"GET",
"/api/collections/containers/records",
headers=headers,
params={"page": 1, "perPage": 200, "sort": "-updated"},
)
logger.info("beszel raw containers payload: %s", payload)
if not isinstance(payload, dict):
logger.warning("beszel containers mapping: payload is not a dict")
return snapshot
items = payload.get("items")
if not isinstance(items, list):
logger.warning("beszel containers mapping: items missing or not a list")
return snapshot
if not items:
logger.warning("beszel containers mapping: no container records returned")
return snapshot
containers: list[DockerContainerSummary] = []
running = 0
stopped = 0
unhealthy = 0
for item in items:
if not isinstance(item, dict):
continue
state = self._normalize_container_state(item)
if state == "running":
running += 1
elif state == "unhealthy":
unhealthy += 1
else:
stopped += 1
containers.append(
DockerContainerSummary(
id=str(item.get("id") or ""),
name=str(item.get("name") or item.get("container") or item.get("service") or "unknown"),
state=state,
status_text=str(item.get("status") or item.get("state") or "unknown"),
image=str(item.get("image") or ""),
health=str(item.get("health") or item.get("health_status") or "").lower() or None,
)
)
normalized = DockerSnapshot(
source_status="online",
running=running,
stopped=stopped,
unhealthy=unhealthy,
total=len(containers),
containers=containers,
)
logger.info("beszel normalized containers snapshot: %s", normalized.model_dump())
return normalized
def _normalize_snapshot(self, record: dict[str, Any], details: dict[str, Any] | None) -> BeszelSystemSnapshot:
stats = self._coerce_mapping(record.get("stats") or {})
details = self._coerce_mapping(details or {})
details_payload = self._coerce_mapping(
details.get("details")
or details.get("stats")
or details.get("info")
or details.get("data")
or {}
)
expanded_system = self._coerce_mapping(self._coerce_mapping(record.get("expand")).get("system"))
memory_used_gb = self._as_float(stats.get("m"))
memory_total_gb = self._as_float(stats.get("m"))
if stats.get("mp"):
memory_total_gb = round(memory_used_gb / (self._as_float(stats.get("mp")) / 100), 1) if self._as_float(stats.get("mp")) else memory_used_gb
memory_available_gb = max(round(memory_total_gb - memory_used_gb, 1), 0.0)
network_pair = stats.get("b") if isinstance(stats.get("b"), list) else []
disks = self._normalize_disks(
details_payload.get("disks")
or details_payload.get("disk")
or details_payload.get("mounts")
or details.get("disks")
or details.get("disk")
or details.get("mounts")
or []
)
if not disks:
logger.info("beszel storage unsupported: no disks/mounts in payload")
return BeszelSystemSnapshot(
source_status="online",
host_name=str(
details_payload.get("hostname")
or details_payload.get("host")
or details.get("hostname")
or details.get("host")
or expanded_system.get("name")
or record.get("system_name")
or record.get("name")
or "unknown"
),
agent_name="beszel-agent",
cpu_usage_percent=self._as_float(stats.get("cpu")),
cpu_cores=len(stats.get("cpus")) if isinstance(stats.get("cpus"), list) else 0,
load_1=0.0,
load_5=0.0,
load_15=0.0,
memory_used_gb=memory_used_gb,
memory_total_gb=memory_total_gb,
memory_available_gb=memory_available_gb,
memory_usage_percent=self._as_float(stats.get("mp")),
primary_interface=str(
details_payload.get("primary_interface")
or details_payload.get("interface")
or details.get("primary_interface")
or "primary"
),
network_rx_mbps=self._network_value_to_mbps(network_pair[0] if len(network_pair) > 0 else 0),
network_tx_mbps=self._network_value_to_mbps(network_pair[1] if len(network_pair) > 1 else 0),
uptime_seconds=self._minutes_to_seconds(stats.get("d")),
platform=str(
details_payload.get("platform")
or details_payload.get("os")
or details.get("platform")
or "unknown"
),
kernel=str(details_payload.get("kernel") or details.get("kernel") or "unknown"),
disks=disks,
)
async def _fetch_system_details(
self,
headers: dict[str, str],
stats_record: dict[str, Any],
) -> dict[str, Any] | None:
system_id = stats_record.get("system")
params: dict[str, Any] = {
"page": 1,
"perPage": 100,
"sort": "-created",
}
payload = await self._request_json(
"GET",
"/api/collections/system_details/records",
headers=headers,
params=params,
)
logger.info("beszel raw details payload: %s", payload)
if not isinstance(payload, dict):
logger.warning("beszel system_details mapping: payload is not a dict")
return None
items = payload.get("items")
if isinstance(items, list) and items:
if system_id:
for item in items:
if isinstance(item, dict) and str(item.get("system") or "") == str(system_id):
return item
return items[0]
logger.warning("beszel system_details mapping: no matching detail records returned")
return None
@staticmethod
def _normalize_container_state(item: dict[str, Any]) -> str:
raw = " ".join(
str(item.get(key) or "")
for key in ("status", "state", "health", "health_status")
).lower()
if "unhealthy" in raw or "degraded" in raw:
return "unhealthy"
if any(token in raw for token in ("running", "up", "healthy", "active")):
return "running"
if raw.strip():
return "stopped"
return "unknown"
async def _build_auth_headers(self) -> dict[str, str] | None:
admin_headers = await self._get_admin_auth_headers()
if admin_headers:
return admin_headers
if self.settings.beszel_api_token:
return {"Authorization": self.settings.beszel_api_token}
return None
async def _get_admin_auth_headers(self) -> dict[str, str] | None:
if not self.settings.beszel_admin_email or not self.settings.beszel_admin_password:
return None
now = datetime.now(timezone.utc)
if self._admin_jwt and self._admin_jwt_expires_at and now < self._admin_jwt_expires_at:
return {"Authorization": self._admin_jwt}
auth_payload = await self._request_json(
"POST",
"/api/collections/_superusers/auth-with-password",
headers={"Content-Type": "application/json"},
params=None,
)
if auth_payload is None:
auth_payload = await self._request_json_with_body(
"POST",
"/api/collections/_superusers/auth-with-password",
json_body={
"identity": self.settings.beszel_admin_email,
"password": self.settings.beszel_admin_password,
},
)
if not isinstance(auth_payload, dict):
logger.warning("beszel admin auth failed: no payload")
return None
token = auth_payload.get("token")
if not token:
logger.warning("beszel admin auth failed: token missing")
return None
self._admin_jwt = str(token)
self._admin_jwt_expires_at = now + timedelta(minutes=30)
logger.info("beszel admin auth succeeded")
return {"Authorization": self._admin_jwt}
async def _request_json_with_body(
self,
method: str,
path: str,
*,
json_body: dict[str, Any],
) -> Any | None:
if not self.base_url:
return None
import httpx
url = f"{self.base_url}/{path.lstrip('/')}"
try:
async with httpx.AsyncClient(
timeout=self.settings.request_timeout_seconds,
trust_env=False,
) as client:
response = await client.request(
method,
url,
json=json_body,
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return response.json()
except httpx.TimeoutException:
logger.warning("beszel auth request timed out: %s %s", method, url)
except httpx.HTTPStatusError as exc:
logger.warning("beszel auth request failed with status %s for %s %s", exc.response.status_code, method, url)
try:
logger.info("beszel auth error payload: %s", exc.response.json())
except ValueError:
logger.info("beszel auth error text: %s", exc.response.text)
except httpx.HTTPError as exc:
logger.warning("beszel auth request error for %s %s: %s", method, url, exc)
return None
def _normalize_disks(self, raw_disks: Any) -> list[BeszelDiskMetric]:
if isinstance(raw_disks, dict):
raw_disks = [
{"mount": key, **(value if isinstance(value, dict) else {"used": value})}
for key, value in raw_disks.items()
]
disks: list[BeszelDiskMetric] = []
if not isinstance(raw_disks, list):
return disks
for item in raw_disks:
if not isinstance(item, dict):
continue
total_gb = self._bytes_to_gb(item.get("total") or item.get("total_bytes"))
used_gb = self._bytes_to_gb(item.get("used") or item.get("used_bytes"))
free_gb = self._bytes_to_gb(item.get("free") or item.get("free_bytes"))
usage_percent = self._as_float(item.get("usage_percent") or item.get("percent"))
if not total_gb and used_gb and free_gb:
total_gb = round(used_gb + free_gb, 1)
disks.append(
BeszelDiskMetric(
name=str(item.get("name") or item.get("device") or item.get("mount") or "disk"),
mount=str(item.get("mount") or item.get("path") or "/"),
used_gb=used_gb,
total_gb=total_gb,
free_gb=free_gb,
usage_percent=usage_percent,
)
)
return disks
@staticmethod
def _coerce_mapping(value: Any) -> dict[str, Any]:
return value if isinstance(value, dict) else {}
@staticmethod
def _as_float(value: Any) -> float:
try:
return round(float(value or 0), 1)
except (TypeError, ValueError):
return 0.0
@staticmethod
def _as_int(value: Any) -> int:
try:
return int(float(value or 0))
except (TypeError, ValueError):
return 0
@classmethod
def _bytes_to_gb(cls, value: Any) -> float:
if value in (None, ""):
return 0.0
try:
return round(float(value) / (1024 ** 3), 1)
except (TypeError, ValueError):
return 0.0
@classmethod
def _bytes_per_second_to_mbps(cls, value: Any) -> float:
if value in (None, ""):
return 0.0
try:
return round((float(value) * 8) / 1_000_000, 1)
except (TypeError, ValueError):
return 0.0
@classmethod
def _network_value_to_mbps(cls, value: Any) -> float:
try:
numeric = float(value or 0)
except (TypeError, ValueError):
return 0.0
if numeric <= 0:
return 0.0
return round((numeric * 8) / 1_000_000, 1)
@classmethod
def _minutes_to_seconds(cls, value: Any) -> int:
try:
return int(float(value or 0) * 60)
except (TypeError, ValueError):
return 0
@@ -0,0 +1,103 @@
from __future__ import annotations
import logging
from app.clients.beszel_client import BeszelClient
from app.clients.base import BaseHTTPClient
from app.config import Settings
from app.models.sources import DockerContainerSummary, DockerSnapshot
logger = logging.getLogger(__name__)
class DockerProxyClient(BaseHTTPClient):
def __init__(self, settings: Settings) -> None:
super().__init__(settings, "docker-proxy", settings.docker_proxy_base_url)
self.beszel_client = BeszelClient(settings)
async def fetch_containers(self) -> DockerSnapshot:
snapshot = DockerSnapshot()
payload = await self._request_json("GET", "/containers/json", params={"all": "true"})
if not isinstance(payload, list):
logger.warning("docker proxy returned non-list payload: %s", payload)
fallback = await self.beszel_client.fetch_container_snapshot()
if fallback.source_status == "online":
logger.info("docker proxy fallback to beszel containers succeeded")
return fallback
logger.warning(
"docker integration unavailable: docker proxy unreachable and beszel container fallback returned no usable data"
)
return snapshot
logger.info("docker proxy raw payload count: %s", len(payload))
logger.info("docker proxy raw payload sample: %s", payload[:3])
containers: list[DockerContainerSummary] = []
running = 0
stopped = 0
unhealthy = 0
for item in payload:
if not isinstance(item, dict):
continue
state = self._normalize_state(item)
if state == "running":
running += 1
elif state == "unhealthy":
unhealthy += 1
else:
stopped += 1
containers.append(
DockerContainerSummary(
id=str(item.get("Id") or item.get("ID") or ""),
name=self._normalize_name(item.get("Names")),
state=state,
status_text=str(item.get("Status") or item.get("State") or "unknown"),
image=str(item.get("Image") or ""),
health=self._extract_health(item),
)
)
normalized = DockerSnapshot(
source_status="online",
running=running,
stopped=stopped,
unhealthy=unhealthy,
total=len(containers),
containers=containers,
)
logger.info("docker proxy normalized snapshot: %s", normalized.model_dump())
return normalized
@staticmethod
def _normalize_name(names: object) -> str:
if isinstance(names, list) and names:
return str(names[0]).lstrip("/")
return "unknown"
@classmethod
def _normalize_state(cls, item: dict) -> str:
status_text = str(item.get("Status") or "").lower()
state = str(item.get("State") or "").lower()
health = cls._extract_health(item)
if health == "unhealthy" or "unhealthy" in status_text:
return "unhealthy"
if state == "running":
return "running"
if state:
return "stopped"
return "unknown"
@staticmethod
def _extract_health(item: dict) -> str | None:
if isinstance(item.get("Health"), str):
return str(item["Health"]).lower()
if isinstance(item.get("State"), dict):
health = item["State"].get("Health")
if isinstance(health, dict):
return str(health.get("Status") or "").lower() or None
return None
@@ -0,0 +1,53 @@
from __future__ import annotations
from datetime import datetime, timezone
import logging
from time import perf_counter
from app.clients.base import BaseHTTPClient
from app.config import Settings
from app.models.sources import HomeAssistantSnapshot
logger = logging.getLogger(__name__)
class HomeAssistantClient(BaseHTTPClient):
def __init__(self, settings: Settings) -> None:
super().__init__(settings, "home-assistant", settings.home_assistant_base_url)
async def fetch_status(self) -> HomeAssistantSnapshot:
snapshot = HomeAssistantSnapshot()
if not self.base_url or not self.settings.home_assistant_token:
logger.info("home assistant skipped: base URL or token missing")
return snapshot
headers = {
"Authorization": f"Bearer {self.settings.home_assistant_token}",
"Content-Type": "application/json",
}
started_at = perf_counter()
api_info = await self._request_json("GET", "/api/", headers=headers)
if api_info is None:
logger.warning("home assistant base API check failed")
return snapshot
logger.info("home assistant raw /api response: %s", api_info)
elapsed_ms = int((perf_counter() - started_at) * 1000)
config = await self._request_json("GET", "/api/config", headers=headers)
logger.info("home assistant raw /api/config response: %s", config)
version = None
if isinstance(config, dict):
version = config.get("version")
normalized = HomeAssistantSnapshot(
status="online",
version=str(version) if version else None,
response_time_ms=elapsed_ms,
last_checked=datetime.now(timezone.utc),
)
logger.info("home assistant normalized snapshot: %s", normalized.model_dump())
return normalized
@@ -0,0 +1,178 @@
from __future__ import annotations
import logging
import re
import httpx
from app.clients.base import BaseHTTPClient
from app.config import Settings
from app.models.sources import UptimeKumaMonitor, UptimeKumaSnapshot
METRIC_LINE_RE = re.compile(r'^(?P<name>[a-zA-Z_:][a-zA-Z0-9_:]*)\{(?P<labels>[^}]*)\}\s+(?P<value>.+)$')
LABEL_RE = re.compile(r'(\w+)="((?:[^"\\]|\\.)*)"')
logger = logging.getLogger(__name__)
class UptimeKumaClient(BaseHTTPClient):
"""
Reads Uptime Kuma monitor status from the documented /metrics endpoint.
This avoids coupling the backend to Socket.IO login flows, but still relies
on Kuma's internal metrics surface, which may change across releases.
"""
def __init__(self, settings: Settings) -> None:
super().__init__(settings, "uptime-kuma", settings.uptime_kuma_base_url)
async def fetch_monitors(self) -> UptimeKumaSnapshot:
snapshot = UptimeKumaSnapshot()
if not self.base_url:
logger.info("uptime kuma skipped: base URL missing")
return snapshot
raw_metrics = None
if self.settings.uptime_kuma_api_key:
raw_metrics = await self._request_metrics_with_mode(
"basic-api-key",
auth=(self.settings.uptime_kuma_api_key, ""),
)
if (
not raw_metrics
and self.settings.uptime_kuma_username
and self.settings.uptime_kuma_password
):
raw_metrics = await self._request_metrics_with_mode(
"basic-user",
auth=(self.settings.uptime_kuma_username, self.settings.uptime_kuma_password),
)
if raw_metrics is None and not (
self.settings.uptime_kuma_api_key
or (self.settings.uptime_kuma_username and self.settings.uptime_kuma_password)
):
logger.info("uptime kuma skipped: no usable metrics auth configured")
return snapshot
if not raw_metrics:
logger.warning("uptime kuma returned empty metrics payload or metrics auth failed")
return snapshot
logger.info("uptime kuma raw metrics first 40 lines: %s", raw_metrics.splitlines()[:40])
monitors = self._parse_metrics(raw_metrics)
up = sum(1 for monitor in monitors if monitor.status == "online")
down = sum(1 for monitor in monitors if monitor.status == "offline")
paused = sum(1 for monitor in monitors if monitor.status == "degraded")
normalized = UptimeKumaSnapshot(
source_status="online",
monitors_up=up,
monitors_down=down,
monitors_paused=paused,
total=len(monitors),
monitors=sorted(monitors, key=lambda monitor: monitor.name.lower()),
)
logger.info("uptime kuma normalized snapshot: %s", normalized.model_dump())
return normalized
async def _request_metrics_with_mode(
self,
mode: str,
*,
headers: dict[str, str] | None = None,
auth: tuple[str, str] | None = None,
) -> str | None:
if not self.base_url:
return None
url = f"{self.base_url}/metrics"
try:
async with httpx.AsyncClient(
timeout=self.settings.request_timeout_seconds,
trust_env=False,
) as client:
response = await client.request(
"GET",
url,
headers=headers,
auth=auth,
)
if response.status_code == 200 and response.text:
logger.info("uptime kuma metrics auth succeeded via %s", mode)
return response.text
if response.status_code in {401, 403}:
logger.warning("uptime kuma auth failed (401/403)")
else:
logger.info(
"uptime kuma metrics auth failed via %s with status %s",
mode,
response.status_code,
)
return None
except httpx.TimeoutException:
logger.warning("uptime kuma metrics request timed out via %s", mode)
except httpx.HTTPError as exc:
logger.warning("uptime kuma metrics request error via %s: %s", mode, exc)
return None
def _parse_metrics(self, payload: str) -> list[UptimeKumaMonitor]:
status_by_id: dict[str, UptimeKumaMonitor] = {}
for line in payload.splitlines():
parsed = self._parse_metric_line(line)
if parsed is None:
continue
metric_name, labels, raw_value = parsed
monitor_id = labels.get("monitor_id") or labels.get("id") or labels.get("monitor")
monitor_name = labels.get("monitor_name") or labels.get("name")
if not monitor_id or not monitor_name:
continue
monitor = status_by_id.setdefault(
monitor_id,
UptimeKumaMonitor(
id=monitor_id,
name=monitor_name,
status="offline",
monitor_type=labels.get("monitor_type") or labels.get("type"),
),
)
if metric_name == "monitor_status":
status_code = self._as_float(raw_value)
if status_code == 1:
monitor.status = "online"
elif status_code == 3:
monitor.status = "degraded"
else:
monitor.status = "offline"
elif metric_name == "monitor_response_time":
latency = self._as_float(raw_value)
monitor.latency_ms = int(latency) if latency >= 0 else None
return list(status_by_id.values())
@staticmethod
def _parse_metric_line(line: str) -> tuple[str, dict[str, str], str] | None:
if not line or line.startswith("#"):
return None
match = METRIC_LINE_RE.match(line.strip())
if not match:
return None
labels = {
key: value.encode("utf-8").decode("unicode_escape")
for key, value in LABEL_RE.findall(match.group("labels"))
}
return match.group("name"), labels, match.group("value")
@staticmethod
def _as_float(value: str) -> float:
try:
return float(value)
except ValueError:
return -1.0