Harden backup and posture checks

This commit is contained in:
2026-05-16 13:04:22 +02:00
parent 12a87ad342
commit 878ad2d5f1
25 changed files with 716 additions and 54 deletions
+3 -3
View File
@@ -43,6 +43,7 @@ The inclusion of `/local/secrets` is intentional: Borg is expected to cover disa
| Komodo | config + Mongo dump | `/local/borg-dumps`, `/local/appdata/komodo/periphery`, `/local/appdata/komodo/core` |
| Nextcloud | DB dump + file data | `/local/borg-dumps`, `/local/appdata/nextcloud/html`, `/local/nextcloud/data` |
| Grafana | SQLite dump + file data | `/local/borg-dumps`, `/local/appdata/grafana` |
| Filebrowser | SQLite dump + file data | `/local/borg-dumps`, `/local/appdata/filebrowser` |
| InfluxDB 3 Core | file data | `/local/appdata/influxdb3/data`, `/local/appdata/influxdb3/plugins` |
| Hermes Agent | file data + SSH key | `/local/appdata/hermes-agent/data`, `/local/secrets/hermes_runner_id_ed25519` |
| BentoPDF | rebuildable | no critical persistence in compose |
@@ -53,7 +54,7 @@ These are deviations from the standard "DB dump first, file path second" strateg
### Nextcloud
`pre-backup-dumps.sh` writes `nextcloud.dump` from `nextcloud-postgres`. Borg UI also mounts `/mnt/user/documents/nextcloud-data` read-only as `/local/nextcloud/data`, so database and user files are both inside scope after the Borg UI stack is recreated.
Option A umgesetzt: `pre-backup-dumps.sh` writes `nextcloud.dump` from `nextcloud-postgres`. Borg UI also mounts `/mnt/user/documents/nextcloud-data` read-only as `/local/nextcloud/data`, so database and user files are both inside scope after the Borg UI stack is recreated.
### Komodo Mongo dump
@@ -76,7 +77,7 @@ These are deviations from the standard "DB dump first, file path second" strateg
### Other Databases
- Komodo MongoDB
- SQLite: `gitea`, `vaultwarden`, `uptime-kuma`, `speedtest-tracker`, `borg-ui`, `grafana`
- SQLite: `gitea`, `vaultwarden`, `uptime-kuma`, `speedtest-tracker`, `filebrowser`, `borg-ui`, `grafana`
## Explicitly Not Backed Up as Raw Live DB Files
@@ -98,7 +99,6 @@ These are not part of the first-class Borg scope:
- uptime-kuma
- scrutiny metrics history
- dozzle, glances, speedtest
- filebrowser app state
## Suggested Retention
+41 -6
View File
@@ -94,6 +94,39 @@ dump_sqlite_file() {
atomic_write "$output" "$tmp"
}
dump_sqlite_container() {
container="$1"
db_path="$2"
output="$3"
if ! need_container "$container"; then
warn "Skipping missing container: $container"
return 0
fi
container_tmp="/tmp/$(basename "$output").bak"
tmp="$TMP_DIR/$(basename "$output").tmp"
log "Dumping SQLite database '$db_path' from $container"
rm -f "$tmp"
docker exec "$container" rm -f "$container_tmp" >/dev/null 2>&1 || true
if ! docker exec "$container" sqlite3 "$db_path" ".backup $container_tmp"; then
warn "SQLite backup failed for $container:$db_path"
docker exec "$container" rm -f "$container_tmp" >/dev/null 2>&1 || true
rm -f "$tmp"
return 1
fi
docker cp "$container:$container_tmp" "$tmp"
docker exec "$container" rm -f "$container_tmp" >/dev/null 2>&1 || true
if [ "$(sqlite3 "$tmp" 'PRAGMA quick_check;')" != "ok" ]; then
warn "SQLite quick_check failed for $container:$db_path"
rm -f "$tmp"
return 1
fi
atomic_write "$output" "$tmp"
}
dump_optional_pg_db() {
container="$1"
password="$2"
@@ -196,12 +229,14 @@ main() {
warn "Skipping missing container: nextcloud-postgres"
fi
# SQLite databases. Use host-side sqlite3 so the dump does not depend on
# utility packages inside application images.
dump_sqlite_file "/mnt/user/services/gitea/data/gitea/gitea.db" "$LATEST_DIR/gitea.sqlite" "gitea"
dump_sqlite_file "/mnt/user/appdata/vaultwarden/db.sqlite3" "$LATEST_DIR/vaultwarden.sqlite" "vaultwarden"
dump_sqlite_file "/mnt/user/appdata/uptime-kuma/kuma.db" "$LATEST_DIR/uptime-kuma.sqlite" "uptime-kuma"
dump_sqlite_file "/mnt/user/appdata/speedtest-tracker/config/database.sqlite" "$LATEST_DIR/speedtest-tracker.sqlite" "speedtest-tracker"
# SQLite databases
dump_sqlite_container "gitea" "/data/gitea/gitea.db" "$LATEST_DIR/gitea.sqlite.dump"
dump_sqlite_container "vaultwarden" "/data/db.sqlite3" "$LATEST_DIR/vaultwarden.sqlite.dump"
dump_sqlite_container "uptime-kuma" "/app/data/kuma.db" "$LATEST_DIR/uptime-kuma.sqlite.dump"
dump_sqlite_container "speedtest-tracker" "/config/database.sqlite" "$LATEST_DIR/speedtest-tracker.sqlite.dump"
dump_sqlite_container "filebrowser" "/database/filebrowser.db" "$LATEST_DIR/filebrowser.sqlite.dump"
# Additional host-side SQLite dumps for admin tooling with appdata files.
dump_sqlite_file "/mnt/user/appdata/borg-ui/data/borg.db" "$LATEST_DIR/borg-ui.sqlite" "borg-ui"
dump_sqlite_file "/mnt/user/appdata/grafana/grafana.db" "$LATEST_DIR/grafana.sqlite" "grafana"
+38
View File
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "$SCRIPT_DIR/../../.." && pwd)}"
POSTURE_CHECK="${POSTURE_CHECK:-$REPO_ROOT/services/posture-check/posture-check.sh}"
FRESHNESS_CHECK="${FRESHNESS_CHECK:-$REPO_ROOT/ops/restore-tests/check-restore-freshness.sh}"
PRE_BACKUP_DUMPS="${PRE_BACKUP_DUMPS:-$SCRIPT_DIR/pre-backup-dumps.sh}"
NTFY_SCRIPT="${NTFY_SCRIPT:-$REPO_ROOT/ops/restore-tests/send-ntfy.sh}"
NTFY_TOPIC="${NTFY_TOPIC:-kallilab-critical}"
notify_failure() {
local step="$1"
local message="$2"
if [ -x "$NTFY_SCRIPT" ]; then
"$NTFY_SCRIPT" "$NTFY_TOPIC" "Borg pre-hook failed: $step" "$message" high || true
fi
}
run_step() {
local step="$1"
shift
echo "[pre-borg] Running $step"
if "$@"; then
echo "[pre-borg] OK: $step"
else
rc=$?
notify_failure "$step" "Command failed with exit code $rc: $*"
exit "$rc"
fi
}
run_step "posture-check" "$POSTURE_CHECK"
run_step "pre-backup-dumps" "$PRE_BACKUP_DUMPS"
run_step "restore-freshness" "$FRESHNESS_CHECK"
echo "[pre-borg] All pre-flight checks passed"
+1 -1
View File
@@ -1,6 +1,6 @@
services:
code-server:
image: lscr.io/linuxserver/code-server:latest@sha256:4620adace18935dd6ca79d77e3bc1c379e21875392192f970cf5d6b0fb4aefcd
image: lscr.io/linuxserver/code-server:4.116.0@sha256:4620adace18935dd6ca79d77e3bc1c379e21875392192f970cf5d6b0fb4aefcd
container_name: code-server
restart: unless-stopped
security_opt:
+4 -2
View File
@@ -1,6 +1,6 @@
services:
filebrowser:
image: filebrowser/filebrowser:latest@sha256:4dce87308b9f9cfbcf8d0a284fc9565d2b515530a6bae2d920b388161e093f26
image: filebrowser/filebrowser:v2.63.2@sha256:4dce87308b9f9cfbcf8d0a284fc9565d2b515530a6bae2d920b388161e093f26
container_name: filebrowser
restart: unless-stopped
security_opt:
@@ -9,7 +9,9 @@ services:
- PUID=99
- PGID=100
volumes:
- /mnt/user/appdata:/srv/appdata
- /mnt/user/documents:/srv/documents
- /mnt/user/photos:/srv/photos
- /mnt/user/projekte:/srv/projekte
- /mnt/user/appdata/filebrowser/database:/database
- /mnt/user/appdata/filebrowser/config:/config
command: ["--database", "/database/filebrowser.db"]
+4
View File
@@ -252,6 +252,10 @@ function Test-ServicePolicies {
}
}
if ($service.Image -match ':[Ll]atest(?:[-@]|$)') {
Add-Finding -Findings $Findings -Severity 'warning' -Code 'IMAGE001' -Target $targetBase -Message 'Image uses a latest tag. Prefer a concrete version tag, even when a digest is present.'
}
$isDataService = $false
$identityText = ($service.ServiceName + ' ' + $service.ContainerName + ' ' + $service.Image).ToLowerInvariant()
foreach ($needle in @('postgres', 'redis', 'mongo', 'mysql', 'mariadb', 'influxdb')) {
+19 -3
View File
@@ -1,7 +1,7 @@
param(
[string]$DumpRoot = "/mnt/user/backups/borg/dumps/latest",
[string]$ReportRoot = "/mnt/user/backups/restore-reports",
[int]$MaxDumpAgeHours = 36,
[int]$MaxDumpAgeHours = 26,
[int]$MaxReportAgeDays = 45
)
@@ -9,7 +9,13 @@ $checks = @(
@{ Name = "postgresql17-paperless.dump"; Path = Join-Path $DumpRoot "postgresql17-paperless.dump" },
@{ Name = "postgresql17-mailarchiver.dump"; Path = Join-Path $DumpRoot "postgresql17-mailarchiver.dump" },
@{ Name = "mealie.dump"; Path = Join-Path $DumpRoot "mealie.dump" },
@{ Name = "immich.dump"; Path = Join-Path $DumpRoot "immich.dump" }
@{ Name = "immich.dump"; Path = Join-Path $DumpRoot "immich.dump" },
@{ Name = "nextcloud.dump"; Path = Join-Path $DumpRoot "nextcloud.dump" },
@{ Name = "gitea.sqlite.dump"; Path = Join-Path $DumpRoot "gitea.sqlite.dump" },
@{ Name = "vaultwarden.sqlite.dump"; Path = Join-Path $DumpRoot "vaultwarden.sqlite.dump" },
@{ Name = "uptime-kuma.sqlite.dump"; Path = Join-Path $DumpRoot "uptime-kuma.sqlite.dump" },
@{ Name = "speedtest-tracker.sqlite.dump"; Path = Join-Path $DumpRoot "speedtest-tracker.sqlite.dump" },
@{ Name = "filebrowser.sqlite.dump"; Path = Join-Path $DumpRoot "filebrowser.sqlite.dump" }
)
$reportChecks = @(
@@ -30,15 +36,25 @@ foreach ($check in $checks) {
}
$item = Get-Item $check.Path
if ($item.Length -le 0) {
$critical.Add("DUMP_EMPTY $($check.Name)")
continue
}
$ageHours = ($now - $item.LastWriteTime).TotalHours
if ($ageHours -gt $MaxDumpAgeHours) {
$warnings.Add(("DUMP_STALE {0} age={1:N1}h" -f $check.Name, $ageHours))
$critical.Add(("DUMP_STALE {0} age={1:N1}h" -f $check.Name, $ageHours))
} else {
$info.Add(("DUMP_OK {0} age={1:N1}h" -f $check.Name, $ageHours))
}
}
foreach ($check in $reportChecks) {
if (-not (Test-Path $ReportRoot)) {
$warnings.Add("REPORT_ROOT_MISSING $ReportRoot")
break
}
$latest = Get-ChildItem -Path $ReportRoot -Filter ([System.IO.Path]::GetFileName($check.Path)) -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending |
Select-Object -First 1
+22 -3
View File
@@ -3,7 +3,7 @@ set -euo pipefail
DUMP_ROOT="${DUMP_ROOT:-/mnt/user/backups/borg/dumps/latest}"
REPORT_ROOT="${REPORT_ROOT:-/mnt/user/backups/restore-reports}"
MAX_DUMP_AGE_HOURS="${MAX_DUMP_AGE_HOURS:-36}"
MAX_DUMP_AGE_HOURS="${MAX_DUMP_AGE_HOURS:-26}"
MAX_REPORT_AGE_DAYS="${MAX_REPORT_AGE_DAYS:-45}"
now_epoch="$(date +%s)"
@@ -25,21 +25,40 @@ check_file_age_days() {
echo $(( (now_epoch - mtime) / 86400 ))
}
for dump in postgresql17-paperless.dump postgresql17-mailarchiver.dump mealie.dump immich.dump; do
for dump in \
postgresql17-paperless.dump \
postgresql17-mailarchiver.dump \
mealie.dump \
immich.dump \
nextcloud.dump \
gitea.sqlite.dump \
vaultwarden.sqlite.dump \
uptime-kuma.sqlite.dump \
speedtest-tracker.sqlite.dump \
filebrowser.sqlite.dump; do
path="$DUMP_ROOT/$dump"
if [ ! -f "$path" ]; then
critical+=("DUMP_MISSING $dump")
continue
fi
if [ ! -s "$path" ]; then
critical+=("DUMP_EMPTY $dump")
continue
fi
age="$(check_file_age_hours "$path")"
if [ "$age" -gt "$MAX_DUMP_AGE_HOURS" ]; then
warnings+=("DUMP_STALE $dump age=${age}h")
critical+=("DUMP_STALE $dump age=${age}h")
else
info+=("DUMP_OK $dump age=${age}h")
fi
done
for service in vaultwarden gitea paperless; do
if [ ! -d "$REPORT_ROOT" ]; then
warnings+=("REPORT_ROOT_MISSING $REPORT_ROOT")
break
fi
latest="$(find "$REPORT_ROOT" -maxdepth 1 -type f -name "$service-*.md" | sort | tail -n 1 || true)"
if [ -z "$latest" ]; then
warnings+=("REPORT_MISSING $service")
+15
View File
@@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
TOPIC="${TOPIC:-kallilab-info}"
TESTS="${TESTS:-vaultwarden gitea paperless}"
pick_random() {
printf '%s\n' $TESTS | awk 'BEGIN { srand() } { items[++count] = $0 } END { print items[int(rand() * count) + 1] }'
}
selected="$(pick_random)"
echo "Selected monthly restore test: $selected"
exec "$SCRIPT_DIR/run-restore-job-with-ntfy.sh" "$selected" "$TOPIC"
+1 -1
View File
@@ -1,6 +1,6 @@
services:
speedtest-tracker:
image: lscr.io/linuxserver/speedtest-tracker:latest@sha256:eb3d249f16177964daa4fff7f6a90bbf6645f4e23158d92f5cddb133728d0804
image: lscr.io/linuxserver/speedtest-tracker:1.13.12@sha256:eb3d249f16177964daa4fff7f6a90bbf6645f4e23158d92f5cddb133728d0804
container_name: speedtest-tracker
restart: unless-stopped
security_opt: