monitoring + backup: Stale-Handle-Hardening und Dead-Man's-Switch
Schliesst den lokalen Code-Stand fuer zwei offene MASTER_TODO-Punkte ab. monitoring: restliche Einzeldatei-Bind-Mounts (alertmanager, blackbox, loki, promtail, alertmanager-ntfy-bridge) auf Directory-Mounts umgestellt, analog zum Prometheus-Fix vom 2026-06-19. Vermeidet "Stale NFS file handle" auf dem /mnt/user-FUSE-Share bei git/Komodo-Updates. grafana-provisioning war bereits Directory-Mount. `docker compose config` gruen. Beim Deploy --force-recreate noetig, da sich Mount-Zielpfade aendern. backup: endpoint-agnostischer Dead-Man's-Switch (Healthchecks-kompatibel, Cloud oder self-hosted) in pull-critical-backups.ps1 und pre-borg.sh. Pings /start, Erfolg und /fail; No-Op ohne konfigurierte URL, bricht also keinen Lauf. Ping-URLs sind Capability-URLs und bleiben als Secret ausserhalb des Repos. Doku: SECRETS_MAP, Nearline-README und MASTER_TODO nachgezogen. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,35 @@
|
||||
param(
|
||||
[string]$SourceRoot = "\\192.168.178.58\backups",
|
||||
[string]$DestinationRoot = "H:\kallilab-nearline-backups",
|
||||
[string]$HealthchecksUrl = $env:HEALTHCHECKS_NEARLINE_URL,
|
||||
[switch]$WhatIf
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# Externer Dead-Man's-Switch (endpoint-agnostisch: Healthchecks.io-Cloud oder
|
||||
# self-hosted). Bleibt der Erfolgs-Ping aus, alarmiert der externe Dienst von
|
||||
# aussen - genau den Fall, den Prometheus auf Unraid fuer den baerchen-Pull
|
||||
# nicht sieht. Die Ping-URL ist eine Capability-URL -> wie ein Secret behandeln,
|
||||
# niemals ins Repo. Quelle: -HealthchecksUrl -> $env:HEALTHCHECKS_NEARLINE_URL
|
||||
# -> Datei im Userprofil. Ist keine URL gesetzt, ist der Switch ein No-Op.
|
||||
if (-not $HealthchecksUrl) {
|
||||
$hcUrlFile = Join-Path $HOME ".kallilab\healthchecks-nearline-url.txt"
|
||||
if (Test-Path -LiteralPath $hcUrlFile) {
|
||||
$HealthchecksUrl = (Get-Content -LiteralPath $hcUrlFile -Raw).Trim()
|
||||
}
|
||||
}
|
||||
|
||||
function Send-HealthcheckPing {
|
||||
param([string]$Suffix = "")
|
||||
if (-not $HealthchecksUrl) { return }
|
||||
try {
|
||||
Invoke-RestMethod -Uri ("{0}{1}" -f $HealthchecksUrl, $Suffix) -Method Get -TimeoutSec 15 | Out-Null
|
||||
} catch {
|
||||
Write-Warning "Healthchecks ping ('$Suffix') failed: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
$Jobs = @(
|
||||
@{
|
||||
Name = "borg-dumps-latest"
|
||||
@@ -145,44 +169,54 @@ if ($WhatIf) {
|
||||
exit 0
|
||||
}
|
||||
|
||||
$destinationDrive = Split-Path -Qualifier $DestinationRoot
|
||||
Assert-PathExists -Path $destinationDrive -Label "Destination drive"
|
||||
# Echter Lauf -> Dead-Man's-Switch aktiv. /start misst die Laufzeit, /fail
|
||||
# meldet einen abgebrochenen Lauf sofort, der Erfolgs-Ping am Ende bestaetigt.
|
||||
Send-HealthcheckPing "/start"
|
||||
try {
|
||||
$destinationDrive = Split-Path -Qualifier $DestinationRoot
|
||||
Assert-PathExists -Path $destinationDrive -Label "Destination drive"
|
||||
|
||||
$logRoot = Join-Path $DestinationRoot "_logs"
|
||||
$reportRoot = Join-Path $DestinationRoot "_reports"
|
||||
New-Item -ItemType Directory -Force -Path $DestinationRoot, $logRoot, $reportRoot | Out-Null
|
||||
$logRoot = Join-Path $DestinationRoot "_logs"
|
||||
$reportRoot = Join-Path $DestinationRoot "_reports"
|
||||
New-Item -ItemType Directory -Force -Path $DestinationRoot, $logRoot, $reportRoot | Out-Null
|
||||
|
||||
$results = foreach ($job in $Jobs) {
|
||||
Invoke-RobocopyJob -Job $job -LogRoot $logRoot
|
||||
$results = foreach ($job in $Jobs) {
|
||||
Invoke-RobocopyJob -Job $job -LogRoot $logRoot
|
||||
}
|
||||
|
||||
$reportPath = Join-Path $reportRoot ("nearline-pull-{0}.md" -f (Get-Date -Format "yyyy-MM-dd-HHmmss"))
|
||||
|
||||
$lines = @()
|
||||
$lines += "# H:/ Nearline Pull Report - $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')"
|
||||
$lines += ""
|
||||
$lines += "- Source root: ``$SourceRoot``"
|
||||
$lines += "- Destination root: ``$DestinationRoot``"
|
||||
$lines += "- Mode: non-destructive copy, no ``/MIR``, no purge"
|
||||
$lines += ""
|
||||
$lines += "| Job | Exit code | Source | Destination | Log |"
|
||||
$lines += "|---|---:|---|---|---|"
|
||||
foreach ($result in $results) {
|
||||
$lines += "| $($result.Name) | $($result.ExitCode) | ``$($result.Source)`` | ``$($result.Destination)`` | ``$($result.Log)`` |"
|
||||
}
|
||||
$lines += ""
|
||||
$lines += "Expected critical artifacts after run:"
|
||||
$lines += ""
|
||||
$lines += "- ``borg-dumps/latest/immich.dump``"
|
||||
$lines += "- ``borg-dumps/latest/komodo-mongo.archive.gz``"
|
||||
$lines += "- ``git-bundles/gitea/latest-report.md``"
|
||||
$lines += "- ``git-bundles/gitea/micha/*.bundle``"
|
||||
$lines += ""
|
||||
$lines += "Bewusst NICHT in Nearline-Scope:"
|
||||
$lines += ""
|
||||
$lines += "- ``unraid-flash-config.tar.gz`` (hostseitig 0600 root:root; Restore aus Hetzner-Borg)"
|
||||
|
||||
$lines | Set-Content -LiteralPath $reportPath -Encoding UTF8
|
||||
|
||||
Write-Host "H:/ nearline pull completed."
|
||||
Write-Host "Report: $reportPath"
|
||||
|
||||
Send-HealthcheckPing
|
||||
} catch {
|
||||
Send-HealthcheckPing "/fail"
|
||||
throw
|
||||
}
|
||||
|
||||
$reportPath = Join-Path $reportRoot ("nearline-pull-{0}.md" -f (Get-Date -Format "yyyy-MM-dd-HHmmss"))
|
||||
|
||||
$lines = @()
|
||||
$lines += "# H:/ Nearline Pull Report - $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')"
|
||||
$lines += ""
|
||||
$lines += "- Source root: ``$SourceRoot``"
|
||||
$lines += "- Destination root: ``$DestinationRoot``"
|
||||
$lines += "- Mode: non-destructive copy, no ``/MIR``, no purge"
|
||||
$lines += ""
|
||||
$lines += "| Job | Exit code | Source | Destination | Log |"
|
||||
$lines += "|---|---:|---|---|---|"
|
||||
foreach ($result in $results) {
|
||||
$lines += "| $($result.Name) | $($result.ExitCode) | ``$($result.Source)`` | ``$($result.Destination)`` | ``$($result.Log)`` |"
|
||||
}
|
||||
$lines += ""
|
||||
$lines += "Expected critical artifacts after run:"
|
||||
$lines += ""
|
||||
$lines += "- ``borg-dumps/latest/immich.dump``"
|
||||
$lines += "- ``borg-dumps/latest/komodo-mongo.archive.gz``"
|
||||
$lines += "- ``git-bundles/gitea/latest-report.md``"
|
||||
$lines += "- ``git-bundles/gitea/micha/*.bundle``"
|
||||
$lines += ""
|
||||
$lines += "Bewusst NICHT in Nearline-Scope:"
|
||||
$lines += ""
|
||||
$lines += "- ``unraid-flash-config.tar.gz`` (hostseitig 0600 root:root; Restore aus Hetzner-Borg)"
|
||||
|
||||
$lines | Set-Content -LiteralPath $reportPath -Encoding UTF8
|
||||
|
||||
Write-Host "H:/ nearline pull completed."
|
||||
Write-Host "Report: $reportPath"
|
||||
|
||||
Reference in New Issue
Block a user