diff --git a/Homelab_Audit_2026-05-05.pdf b/Homelab_Audit_2026-05-05.pdf new file mode 100644 index 0000000..f92edab --- /dev/null +++ b/Homelab_Audit_2026-05-05.pdf @@ -0,0 +1,194 @@ +%PDF-1.4 +% ReportLab Generated PDF document (opensource) +1 0 obj +<< +/F1 2 0 R /F2 3 0 R /F3 6 0 R +>> +endobj +2 0 obj +<< +/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font +>> +endobj +3 0 obj +<< +/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font +>> +endobj +4 0 obj +<< +/Contents 15 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +5 0 obj +<< +/Contents 16 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +6 0 obj +<< +/BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font +>> +endobj +7 0 obj +<< +/Contents 17 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +8 0 obj +<< +/Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +9 0 obj +<< +/Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +10 0 obj +<< +/Contents 20 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +11 0 obj +<< +/Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 14 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +12 0 obj +<< +/PageMode /UseNone /Pages 14 0 R /Type /Catalog +>> +endobj +13 0 obj +<< +/Author (Claude Sonnet - Read-Only Audit) /CreationDate (D:20260505184207+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260505184207+00'00') /Producer (ReportLab PDF Library - \(opensource\)) + /Subject (KalliLab CORE GitOps & Design Review) /Title (Homelab Audit 2026-05-05) /Trapped /False +>> +endobj +14 0 obj +<< +/Count 7 /Kids [ 4 0 R 5 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R ] /Type /Pages +>> +endobj +15 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 2097 +>> +stream +Gau0D?#SK;&q0MXR&@eM6CQ)Y-0_n7c:=A":+[7c/cl`O30H1o89me\Y]50j4\LnOR7a`2j]I9!jmLEa3BK#Y6U;2GUA6lT'>;,WZaYO>9@n4t^?sDB7s]6)EcY[t8&SkBU">9Pi3Q_H'40>YGBOoKan)>^gPr=,TcFu>imo56nB;F9hpVEZiik_UZingCYo=(.mFW/VV!Bn/IRpbJDV/brKXO82VNm0"^lY-3taP^#8Tk`0/1l;1UbAu7cW0+Qe^_bh8h9jg)=DuRa6,^2._GQ^H+RtW#KI@N#'gN:ZG9gkit_,^E]EK-_^!.Fh&sc-bA56/TSG^>K(e^JE-G-3.$qX!GUK2NOue3',O5?YKmI:8H5O.XP!#&J0_+:R:l8=oY)`SF)%A#[oi`Eqf;OGXqR(LH1;$=,.*%GbC:M.B89n8GSEP3^Yn6/mml/Y^9]]R_Vb_Df<<58i?C;aT\AcRqMoQ]m/_)R)EY2Wl&V[qUh%c?NSqR@5_,gsiF'$!cSQ3lt?dE)i2=Ws*8Y4L(J*9?k"A(Q-sd[OtPTTJi4\8X6u;V]/pc\L^aCF-aM8'^IE0pTK5k_@(=4[9GKjqjqJ'8$-nV2_k<+_Q]tSCHN\`rTZ8oAoYRWY@.)3rX[Q/hCW[<^/K;c4SFS!Wmcc6R:1\!X9K@O(Zcma=bD\=(idpD!*o>ks;KF2"EGc8s"FIU#GbM"Hj5?)$bu*(QlaoPdCt,PnC+RB>PM,LZBs_N@&bZOiDg5P&>4\bg"<`Tt*%T9:+W&WNQe%"".:`VZ$%D7H>r;SCB_B>0rc-?^]$#2,c1&7V-ZqCfOj9X&.9jm_8H=]PY4%PBiZa1i9U=E\p/j@aKO*J_GIq*mhl]/>Rl(70rHlX1P`JHIr;T9m\(B'5A?jah7jLTO%35b@H`_T^Y.rT4O^.YX2RU#'[&m$r?&Kjo1t-6(XH4,jU>0*WHD+iD?M4Uc-Di71XEL&/SRV,F@Rmk9?Kp,UjlreF2/9F:\fF?AIOmWkW#)2[E9'i4Ho:Hb>frEn2E0)U*JFGYdMUe5h+*XpY.)&+lE'_[O/VBS7S`JWM)bEX0E&+n5G]Z"%"mM>ds<_9`]RQ4CV=&>h1*_SbgEK]t2E"[Ag%&7VTrfH(-TT/%n:$19+X7/hE='mW9^'CU3*+N!*L!Vc,."64Fjr7fNj6f3j3^%o)E9P`.:$[sMNGb3@fDPhfMj8_t9RTQr$F'[jOR/WX4gA#3bYnY1rNMfX01)\@%Y"aCop'Z1A[XKfdXLY9:[U&0*.SFE2:9@0\cZ=2;BM#ttLJIgRls+?t~>endstream +endobj +16 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 1616 +>> +stream +Gatn%l#59H'YqKHYB]?1[)*<]Yq]Bq-,PeM3QN*tlE2V1!M7Oeiu'm!QR:ENIc"O"U$7])8e-6m]]8ahnXrHo:+La7"6aqglAER20af`J!1$C^.Rm1&qR9WWE@[G,kd*i&CVUBnl9W)Ua*XI:Lgph##6M6VSV)+,'WuV?q9qi#`7$$'").0g)f#AJeUq(?m.*3c<"4r7\R^ftnV"nZ_W^?b!3p,e-n#6j`BcQ%mE"Ja%Y^0V9U[rXN`Yqecs%gQVj5qEQG,[B1K'Ybp+[G-'Z*fCV+dGOQV3K8r_3>?Z*-`p**cOfnqTk?I'!lg<8%QjuX<"6Y[^K3H,]6(otKI(jF4Pq(9M_8NSR$3n!6^dJJ:[AjRuL81Ud0Vl5)k^Mna^s0FK\Ps8q:BZB5TNpt+._1(pdMc3,4[SdX8>5e$+SBqB0uK8-agDPLECpG!PYujSdnQ%RqZc[_PO9\:,h$;N)rD0h^D_&O9#l.>Bg3IEdqSC@p0p.uC-Zu"m>Z&5_>CUEV_]29pS6SQg?YWllg\NgDFTsa2][lrVfYSn_lC"1S`<2u;VGeL4;RG[pbaEW`AEUL?$AC!&86Ye9M#/XQE7C=oK'?WA"9)*@/]*C.Id"Sa]jsX4q0q_N02IBq(9nuqFTXdnLikt)W>hOa7<-l:&nPR^5(D$l4c7^D]aj#'93e'Aq6rcDQ5];d9oD;Gud8_:?69RTUWYtWXMm^?u6DASM4>RieK7`qh;TY>).pt0-9[V`MW?lu7+V^>Cf-]cVM-Qh#'4P)PK?$Sc3+(cq69)7:+?Q0.4;Wn-;PUiI_uS@.LO\3M6XDTF'D(o9o["OZ5'a(QO=Dj2d7EJ&RK1eLomS17"2CdD:@HfpmLoH+VMgMXYOi6brbd%Wa<`.W-s[qN-NVeF!OmASS/+M$9eQi^Gga,T+S9K*#"hc8SV.eoUfj&+9Y^s]]j/Ta#]6l,acKQ+"dV3*H&7BnN/b7,34cNE!t+Wc+pcs_#u#Mr*7!E*D\W>2*_A@U\\9`bEeJs9@!BO3MDTl"/8X+,\SD.j(de4Z`#'P`=s8s8IGQeMj.>H?1KAsXTnf622.Ai,/6-'7(`cq[PEP\BB7HRW\%O40r*oPYQ&hoCcbZ^EA!J2V9s,HI\#>DP$='Ao2j1Eb)pblmU*WJr-=HR!LD["Gk=F/o)F?cEc5_>d"Uif+kb2diV\XqScrl(l1;`:bRfT%G*(:0S@#=EC88G\7m/&P*u/;!7\pNBS"MF9GccJXj(@K+AMQKp24_lJT+U(m-@s]-iX57.N;C~>endstream +endobj +17 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 2759 +>> +stream +GauHLCNJ5g(B*Z.EMq7rDT8LJIUm&71a>%#e*2QXVFBO49Lj;?r14,QUYh)R,Od8A-/Gpp#P5OgfV\%70fp"M*142&3jqb#kj"9$a9m%DnJmSs:@Xeu[Bi<9m`MZLG$j(5jUZ2l4o(jr`<*BRcLl94ZpKrXbLY48j,B#%=dg]+fn@XhLBGug1=(3&@BY]SMCc[91rI;I+)(R&NSd9LXa?ZXdFDdEHJ>j922uiYEO%YFm>6SfH5QWh_-pbk$jV)e>cs>pNEJ:?S-"&eQF!JD,!p%to9)t_Yj\GHW_A"OLm5\[t)"(1".gD%i0(I!u\[7>&6#BS#sf@10GLLRRV?ViI[d,G\!c\j*ScL>c:(ZAKq.:M2ElaN*fi>WLls&]V%iY9]m-bG`n1sHo`o2luuO/l04:"I$s+mC\NlG+He+(B6:uQ.2K7H:;')M/Q59aMBY0pbje+%[BOR1YBfeGS^<6A.7L>V_^U4%:0/U$k[2QCi/FiK)6"oZl`[HY?R%nMf@mnStHqgRb'e!]0'OAc$35b[taDgF$M5^_#i+jjLW*aYjkGc;/JhSOOD/pWcgtt@QX&j_MC6"Pflj;UBYH-X)GKRbG%?s5Jc:Z$TNEB3]^!QW8pK`lq7:@EeS`=jC$`aK?3_t4$'u:pX*,I&Q-MCHc!+OA]nY1.Mr!t@G,>elu*T(QQU^.j>XSlbEY8iprS5$mtq!U^,EI"&I9\E/D",AhQ:Q,=r\X=@E&AfQqAHaE8bODhFY7RDl^2V=G5W0mqH2Hs).B!BZnS#.6oVOCe?J!g^bC:f5LbcV3THLk8R8GbL1Jj71i>3-mU=;`Vq/]kA5(k3p`/DFhX(Q?DVU'Y=\3.cHV3XrBkXBXH[hJS,<7'Rf9c#NL=7B)jjSAMjM'Y@i2r6mDqSjEBra,k`U2U=:'mPr?*FiHk8TK'jjADD@Q4XdZ(Dc&D.b%=p(A6ni7%saGYYk"p-TNo:[nd5&]lpO-t7Q9A>"0Ct@oM,Gbs,%`JIFjZ`T-FLcMKsS2^=.ga#2g58!gu0E4'%ZJ]Vt>4jKa'/9LQsjoFE"6:3.!pZW!P;T<]rg7B*1FmXJ/f;W?Q#Wb^DuBRd!o72!We3_bKi(ppV1(dJGDOLUF",1Gk;"sR7%!nO#aj[)_:.1:Z*^+?g]_\N$G"ECJ4Veaf`nlQ[V-&P6:b&Y?D.(@4ZPF0]@#6Ie,`8#'eEsKWVg"X33>Fi46*sOAt))ja)'N-$>;Ko/nghr!!k,B<".$IkaGc;3O$;1@ap;`d"dEW9Y9=RL8.q[9Bp_OgYuF\)FXq#UCbI]#F6<'g7XtA-#_++8R@/7"%aV99Yi'p8IO%UEBW-r'PDSha]E^"g&N]TPbD:8Qe1&?ph+6r5eF1=)FT(M'@_q#q&ZpKOB9a5^t?$L%Nait^R7$&]`~>endstream +endobj +18 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 2447 +>> +stream +GatmBa)M8[;;5BPp6$n:V`0%M<_FPSWCU4`kKPFkV:c2i(CfPKAZp3#EBHr9bs0jf**4Q&0G`JL)VJt>co+E["B_joS5@Tb$qfn_7&#$9m40.Q-&IdL49ckL^jicm3Wr^ap[2#PH8f>.85M!^CR6m'FbmR^2ZBMQt+7XMH"I6S0jMr:fr6sbOf&R5'+DQCL]s&=N*LBh0o,D)2"ZE]KS12p7a^/i6btBK9B-YBQPY-L.(PPJ^\J?i#UNt7gEp#"6j`i]HY)j=YdZ('#X246`QWMM"rF,a"i?PV/>_@M*:9*iCU_qFbbo&1B!dj@&0#XCN-tgM(n3W`[)rV2_PFP`dSj;8FOBb[9Ur[O(cH4V,4%!iC>R?6,gG5S;BM[A7M+aJOPj9M'\]D"bg]56XT2dV%tT%e,Jl9e'7/G^*=N4[mgJeCddTuaI%6lhm(*udZ2'IA"2g[!jcN1R`u6/:3Yi*4rb;\4[=+AQC&F"_cALaf5XqP;9(CB5>m\cu5hSq(s]R/``sRP;@mbU1DM)B?m<92S'n$S"cXrX"E610m:-FN,L?TG;B#uUc=p)$Wj8_:o[?'b$WH:#1@c$H%P0.QXWpZBF#.s.AUZ1hYb_2[i_;-N*@BGuIBm4DCqn>5@d,!moCVb0VF2sq)I.Q@a)VT/p"%nSEVL0P_1cilR7\_V27S>5!RoY*Q8DZg%`kO):l,VkTN;irTiVpKO'X!n-1W$0SdV-IZ>PGYX=T\F+e)ML?b'f0:[*(g=2a5O^,,igo1bE-(OUPfKp[[npE+o7NqQif\js)rS%Oc$.<%<(#%nn["P@f<)#qNR`Sf)q4hr.3dS-"I@dJVS*nb700GSgCAeif5ANm4$,$]URjK3PMCH7AiiI3$;EPP7U=-!m%mlp/aiFWME2st9ASM6CC7(gU&Bt]f'_g(82Z1u-+NlIh(Ggr2/^![0#A*VP!nBl2eCQpnd1C8mYnRY]%.pY_`r)]\ZSF0p"5>&]Bq*%FNX)@iRQt;7b?&uB>/0XY05ENk<+G)C'o9"`DF:JK+I9N;Z_h[42ca\SE]jCe%GAbs\-K:J*R3Ck-ei^CP>Q',Aq'aL8fMg\@lF_Zm9s1SpR-$;:R%b+"LS&Be3h?0j5J-1m~>endstream +endobj +19 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 1691 +>> +stream +Gau0DgN)%,&:N/3m,Xk(Ak\!\8<@q+3ht>AOuc?S99Y.F_'19^i^MhF:,DkD/gq;W>rddBg%O')/\$-;(4-%5JrSZpq)qLS?I*$W#6i7X('/Q6"1&O-AiBKH,.9.lk5q>.E6qCJ_^Y>Q=H#"_fVb7L%d2aS0#;N/Qqt4:*!(bFkgcVgbePI+%NTBG&sqo;0RnfUC80\iFGnEZ6j,'6i%n,sR'RK5?H=CK5KI%gqfME6/9mMZ.RA,RLN*LAB#:iZr:q(>?XWS"Vq]XH!2?*B,'r[_@A^#].g>l:HY11pnML.K_9Yi.@lp%o_$))lP\mY\-lH3\ZK1I+msj8*?,2G'kLMIcP4*NraXldE^W'`k&I$oaXChq#F5+T$T\YFl*XDY#4C-iW%e?rO^lTrq>H+W1+Jg1?Vqgjet!`KNB1P,&l#p'I9jD+kL'_Sc*]NL=_ce$@dWedGFH`RRUMCK5R`XD_5Wm4Y1)DE0siPbNt\h#AQQ^L1N$fge:36tY`aD@R$&m]prVQSKae5b"@P^oIallk2i0dn[FXGk=pA_"VUW-HC^dHn2%MrFo]UkI@*`6XQOLpU((V$6>_U^W?C6l(m,@eCkiPB\7FD)f3LYN8*10Cgq1F(fi&PHteN3@]9-2(V^sTm.ZM<aZWdq6Q@b_l3`S@Tnh@Q]ZQ.)ijl[O]Desgd3DXRK?TaM$EO8ujlnfZYu`aJQ?TjuM"REN^,?c`'Y]N^&"B2\[j/cKhHX0Yh.rT?osIJ;>I/J)K9,EV^tt/C!pT$1^'dZqikq[=QUbCgoNc.=/q%6WF(s+5`.3n,NQW3Td&(ff!=WhR/q)3qF(s+5]E&FX)OEO9ThXS,!uT`K/inNVc.:te;"u%:0u+nm#gNF$e!l-O1plL-DP,/Tdhn%kf(&UARl]^/KJVTQl`oafN;a"A"#*F~>endstream +endobj +20 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 2670 +>> +stream +Gau0DD,]4L')p1[khtW;dYO^tTFWS.QkjKF&2o[ip,0PORM>"5WHPdrRc%AmjZ3DV>l24:o[@"[O=-q*7M0_:'DR3b[U>N[;H5qsDNZ,o&A#`]D/jh7sF)pnjl!MSnI(AgNh!l"Ys*hle1#'uHT>]#MER7LV-d[,,iIZ)0Nm,mAK8BEoHq[pf&i"j_h;2Jgm$`W^Zr'!o#`7\a(To(9Zdqu?/Z&hp)M>asnjqG7o<`8cF5+7V$hM6>M3lPZkn>D@o!+K*Vu4'_D2o[E>Ydmj?G.ko@HbB%N]_!G@eB*Pd`ke8/63(RJ86=en)!2b[#(30)Q50W='jjiK)*37!=>!\nd;n=;a):#-mdoeYlA#PZpZfG?9p24t-068<#>*>P\rKjh/MK8*K/SS&tOAo1%q33CuZKO181:`0ta)IN+V,f=,WWN!0abitM=oaU!Ws;*k_CeB*m?lA(=KI7]pnh[c#`D[rYjpLi,ei*oA0Od8Q$>h&g;]F3"KN>$^C%/%aCBllJt[V699q7ONKAa7c4dXf&/9]fp#XO_4,+1sQtTP59>A-n!DfofMNoJ^pAtiqFT[Z3?Z8'B)>^4!@^0BLWAs1id&pIjVt;UYe>-OknH[!#5o?F%Y,[Zkid>$CPa;``>l]".pBZjtZiJbb:b?of&9X_nWQ6bj(l-dT*b',1F*/JJ0)giuP]2L6aCpuMmcB%8+"NeWoJOkW6I<$R5I,Z2[1K6[LJXmhNsLl.-_5Z1e;=1diq1L-Mc9"*&i&L,04DR,N0e[A]b=6Un`\4$:X\P&e$k-6Kk8-K:W?1\Z'/##.GkS0VepqA"t9,TU1%X0q^,kbZ7WpqCZ)uh>dSI+6>#k^>,!eTpNI@gE@5$C/E*;''aJ!EPq`_HV0Ld-'K8"q(:C*+(hf-a)Kulf&0I;M1'R"Q8.3*M0fYe5sE7dlf,CedYRj'(5jKP3G7ooDu&am^E[l;,E=^]tdB?._E'1#I8Kf#M%]>r"`c2&jMF>mmr5F"-^fDIJX)`dXu/JhsuA-Gs_`gHbm3ikr6pK;X')/43EH:%i2$*ndsR63q`0]#^iCci6>[p/4Xilq`)"2KOJKe$Lo?Me4N35"=j)?+7GIbJ_[GmL5*:s^'"T3[&(k9C3hqSq)iqL5+Q$:bU4dE8bgV]r3,q/*oC3trptn35[(#f9.].bB'@/icli\`i:%FB8A'AR/&!V:MZR:SHL&F[8i.DP4AU51jsqGF=6(6:>Xl30md"*'%Su1Lb4l)4d4eq/YfSIsZ`G5=1hh)XK[(;&4U]m2f+=]X=Q:I<-G5:+[90I5FB/44>0_pVFKd``[:uE%fMJcK-j\*&#U6:-;kGj#B4C%&lEN*?ECJer#!X240W3R^A/0?qF[R>SfifD9!9\MP6gHZjDS7t3KchiFNq7N8JP)E!Iq1ACXP?_W2=1=V%Ig[&)J#M::>VsP\8\ReO;nC=iUQI`78t@92rt7tVSq@O9H%KTSQ;M#\5)Q[)ISMV$oD;C_nWZVteIG9!P+]fA&[/XBTq@e9?Ms4;qh_Fn?YZE2YJ(c74NXp(?;U#G.Oi7]<6.1ar6k?37p&86eu3gl+`0))L"*M=S-/G,N8j)Z\rfRZ.bUD6O$U.S%`I`BLpO:;+$;@Oo2\N_0WKTXI"+7CRu^SEd8WMZe<`%Jg5jamK<@@qIomIC_D+gmf@>a7#qp<%&d5:endstream +endobj +21 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 1010 +>> +stream +Gatm8?$"IS'Re<2\AM6RY\Hc/jK0`a['[W3Qs#0c%&:bmM85:dZJ;b0rqO0Mghpf:^r&3.A3&)KkFQau3/$q__sc:ZS/-)A)[A!gW.7AhK&R-_MTMJ((1rCo@piR&&cL$D,e6UbD'kQ,B3_`%OqQb:B!a/kOYI_";$%6$H8ZY&"f*^;U%8k_`Nm8:ISBTR5c"p;A/?UEa[OJu'9WflKn6!lXl/.HIA)4K*XmYKJD:YU%f&8ZTd03h?+DfOZ9V4Z1pN2!jtNhpOI^e7egI2%N,,4$rIj=>U#N@>]?t.2FgaODf,I)R.oM-VK4H3lR#^jFoAFX]s?`@oljs/*F^3^@?XA=.DqC-T_Fn14ARs74(b21`/U*+A<]fmn-(EYnbQ,TjkbM`?$6%dn"b(-2`:XM8R.frYMG.k"8[OjrG:4UA28E5%'*a]JVq\k0.QALEfn"l]bOD,?(r'2L;P=UWFM*[jdOoc`8np!ijs)5WqEJ(YrUkdeL9ZlVE+s5q-GX"0VC2lbH*7^l8*FR+Fq3$E`[>Y*-IOo8ZRaKfddW>rf-Xa)d4)=6eeD!Ym=;3J7:b;5U:YkTkDE>n3Hb-@uhN!Hd[rMe-_-gqNNc3N[4<5hha]+/2IQLs^s:@2>@(=WD(jt[.:05sGGM]_+3F"J>.2uqmFSF'US+RS>_7[*\)r0gN32*'9f@^U:\7lolN>_S@JSU$)pk*f9gie+pK-eqeW$WrP4.uendstream +endobj +xref +0 22 +0000000000 65535 f +0000000061 00000 n +0000000112 00000 n +0000000219 00000 n +0000000331 00000 n +0000000536 00000 n +0000000741 00000 n +0000000856 00000 n +0000001061 00000 n +0000001266 00000 n +0000001471 00000 n +0000001677 00000 n +0000001883 00000 n +0000001953 00000 n +0000002284 00000 n +0000002382 00000 n +0000004571 00000 n +0000006279 00000 n +0000009130 00000 n +0000011669 00000 n +0000013452 00000 n +0000016214 00000 n +trailer +<< +/ID +[] +% ReportLab generated PDF document -- digest (opensource) + +/Info 13 0 R +/Root 12 0 R +/Size 22 +>> +startxref +17316 +%%EOF diff --git a/ops/hermes-agent/scripts/check_health.py b/ops/hermes-agent/scripts/check_health.py new file mode 100644 index 0000000..8fdd1cd --- /dev/null +++ b/ops/hermes-agent/scripts/check_health.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +""" +check_health.py — Homelab Alert Enricher +========================================= +Laedt services.yaml, prueft Docker-Health aller bekannten Abhaengigkeiten, +liest Dump-Timestamps und gibt einen strukturierten JSON-Report aus. + +Hermes liest diesen Report und baut daraus eine angereicherte ntfy-Nachricht. + +Verwendung: + python3 check_health.py # alle unhealthy Container + python3 check_health.py paperless-ngx # gezielt einen Service pruefen + python3 check_health.py --summary # Gesamtstatus als Zusammenfassung + +Pfad auf Host (via Komodo-Clone): + /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py + +services.yaml wird relativ zum Script-Verzeichnis gesucht: + ../services.yaml +""" + +import json +import os +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +# --------------------------------------------------------------------------- +# Konfiguration +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).parent.resolve() +SERVICES_YAML_PATH = SCRIPT_DIR.parent / "services.yaml" + +# Fallback falls das Repo unter einem anderen Pfad liegt +SERVICES_YAML_FALLBACK = Path("/mnt/user/services/homelab/ops/hermes-agent/services.yaml") + +# Dump-Warnschwelle in Stunden (aelter = Warnung) +DUMP_WARN_HOURS = 26 + + +# --------------------------------------------------------------------------- +# Hilfsfunktionen +# --------------------------------------------------------------------------- + +def load_services(): + """Laedt services.yaml. Gibt (services_dict, meta_dict) zurueck.""" + try: + import yaml + except ImportError: + # PyYAML nicht installiert — minimaler Fallback ueber pip + subprocess.run( + [sys.executable, "-m", "pip", "install", "pyyaml", "-q"], + check=True + ) + import yaml + + path = SERVICES_YAML_PATH if SERVICES_YAML_PATH.exists() else SERVICES_YAML_FALLBACK + if not path.exists(): + raise FileNotFoundError(f"services.yaml nicht gefunden: {path}") + + with open(path) as f: + data = yaml.safe_load(f) + + return data.get("services", {}), data.get("meta", {}) + + +def docker_inspect(container_name: str) -> dict: + """ + Gibt {'status': str, 'health': str} zurueck. + status: running | exited | restarting | dead | not_found | error + health: healthy | unhealthy | starting | none | unknown + """ + try: + result = subprocess.run( + [ + "docker", "inspect", + "--format", + "{{.State.Status}}|||{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}", + container_name, + ], + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode != 0: + return {"status": "not_found", "health": "unknown"} + + parts = result.stdout.strip().split("|||") + return { + "status": parts[0].strip() if parts else "unknown", + "health": parts[1].strip() if len(parts) > 1 else "none", + } + except Exception as e: + return {"status": "error", "health": str(e)} + + +def is_healthy(inspect_result: dict) -> bool: + status = inspect_result.get("status", "") + health = inspect_result.get("health", "") + if status != "running": + return False + if health in ("unhealthy",): + return False + return True + + +def get_unhealthy_containers() -> list[str]: + """Gibt Liste aller Container zurueck die unhealthy oder nicht running sind.""" + try: + # unhealthy per healthcheck + r1 = subprocess.run( + ["docker", "ps", "--filter", "health=unhealthy", "--format", "{{.Names}}"], + capture_output=True, text=True, timeout=10, + ) + # exited/dead Container die eigentlich laufen sollten + r2 = subprocess.run( + ["docker", "ps", "--filter", "status=exited", "--format", "{{.Names}}"], + capture_output=True, text=True, timeout=10, + ) + names = set() + for raw in (r1.stdout, r2.stdout): + for name in raw.strip().split("\n"): + name = name.strip() + if name: + names.add(name) + return sorted(names) + except Exception: + return [] + + +def get_dump_info(dump_file: str | None, dump_base: str) -> dict | None: + """Gibt Alter und Groesse des Dump-Files zurueck (oder None wenn nicht vorhanden).""" + if not dump_file: + return None + + path = Path(dump_base) / dump_file + if not path.exists(): + return {"file": dump_file, "exists": False, "age_hours": None, "size_mb": None} + + stat = path.stat() + age_hours = round((datetime.now().timestamp() - stat.st_mtime) / 3600, 1) + size_mb = round(stat.st_size / 1_048_576, 1) + + return { + "file": dump_file, + "exists": True, + "age_hours": age_hours, + "size_mb": size_mb, + "warn": age_hours > DUMP_WARN_HOURS, + } + + +# --------------------------------------------------------------------------- +# Report-Generierung +# --------------------------------------------------------------------------- + +def build_service_report(service_key: str, service: dict, all_services: dict, meta: dict) -> dict: + """Erstellt einen vollstaendigen Report fuer einen einzelnen Service.""" + dump_base = meta.get("dump_base", "/mnt/user/backups/borg/dumps/latest") + + # Eigener Container-Status + own_inspect = docker_inspect(service["container_name"]) + own_healthy = is_healthy(own_inspect) + + # Abhaengigkeits-Check + dep_results = {} + for dep_key in service.get("dependencies", []): + dep = all_services.get(dep_key) + if not dep: + dep_results[dep_key] = {"status": "unknown_service", "health": "unknown", "healthy": False} + continue + insp = docker_inspect(dep["container_name"]) + dep_results[dep_key] = { + **insp, + "healthy": is_healthy(insp), + "tier": dep.get("tier"), + "container_name": dep["container_name"], + } + + unhealthy_deps = [k for k, v in dep_results.items() if not v["healthy"]] + + # Dump-Info + dump_info = get_dump_info(service.get("dump_file"), dump_base) + + return { + "service": service_key, + "description": service.get("description", ""), + "tier": service.get("tier"), + "url": service.get("url"), + "container": { + "name": service["container_name"], + **own_inspect, + "healthy": own_healthy, + }, + "dependencies": dep_results, + "unhealthy_deps": unhealthy_deps, + "dump": dump_info, + "first_check": service.get("first_check", ""), + "notes": service.get("notes", ""), + "timestamp": datetime.now().isoformat(), + } + + +def build_summary_report(all_services: dict, meta: dict) -> dict: + """Prueft alle Tier-1 und Tier-2 Dienste und gibt einen Gesamtstatus zurueck.""" + results = {} + issues = [] + + for key, svc in all_services.items(): + tier = svc.get("tier", 3) + if tier > 2: + continue # Tier-3 im Summary ueberspringen + + insp = docker_inspect(svc["container_name"]) + healthy = is_healthy(insp) + results[key] = { + "tier": tier, + "healthy": healthy, + "status": insp["status"], + "health": insp["health"], + } + if not healthy: + issues.append({"service": key, "tier": tier, **insp}) + + # Dump-Checks fuer alle Dienste mit dump_file + dump_base = meta.get("dump_base", "/mnt/user/backups/borg/dumps/latest") + stale_dumps = [] + for key, svc in all_services.items(): + info = get_dump_info(svc.get("dump_file"), dump_base) + if info and info.get("warn"): + stale_dumps.append({ + "service": key, + "file": info["file"], + "age_hours": info["age_hours"], + }) + + return { + "mode": "summary", + "timestamp": datetime.now().isoformat(), + "services_checked": len(results), + "issues": issues, + "stale_dumps": stale_dumps, + "overall_healthy": len(issues) == 0 and len(stale_dumps) == 0, + } + + +# --------------------------------------------------------------------------- +# Einstiegspunkt +# --------------------------------------------------------------------------- + +def main(): + args = sys.argv[1:] + all_services, meta = load_services() + + if "--summary" in args: + report = build_summary_report(all_services, meta) + print(json.dumps(report, indent=2, ensure_ascii=False)) + return + + # Expliziter Service-Key als Argument + if args and not args[0].startswith("--"): + service_key = args[0] + service = all_services.get(service_key) + if not service: + print(json.dumps({"error": f"Service '{service_key}' nicht in services.yaml gefunden."})) + sys.exit(1) + report = build_service_report(service_key, service, all_services, meta) + print(json.dumps(report, indent=2, ensure_ascii=False)) + return + + # Kein Argument: alle unhealthy Container automatisch finden + unhealthy_names = get_unhealthy_containers() + + if not unhealthy_names: + print(json.dumps({"status": "all_healthy", "timestamp": datetime.now().isoformat()})) + return + + reports = [] + for container_name in unhealthy_names: + # Container-Name auf Service-Key mappen + service_key = None + service = None + for key, svc in all_services.items(): + if svc["container_name"] == container_name: + service_key = key + service = svc + break + + if not service: + reports.append({ + "service": container_name, + "description": "Unbekannter Container (nicht in services.yaml)", + "tier": None, + "container": {"name": container_name, "status": "unhealthy", "health": "unknown", "healthy": False}, + "dependencies": {}, + "unhealthy_deps": [], + "dump": None, + "first_check": "Container nicht in services.yaml — manuell pruefen", + "notes": "services.yaml aktualisieren wenn dieser Container produktiv ist", + "timestamp": datetime.now().isoformat(), + }) + continue + + reports.append(build_service_report(service_key, service, all_services, meta)) + + print(json.dumps(reports, indent=2, ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/ops/hermes-agent/services.yaml b/ops/hermes-agent/services.yaml new file mode 100644 index 0000000..b36cef5 --- /dev/null +++ b/ops/hermes-agent/services.yaml @@ -0,0 +1,592 @@ +# services.yaml — Maschinenlesbare Wissensbasis fuer Hermes Alert Enrichment +# +# Abgeleitet aus docs/SERVICE_CATALOG.md +# Stand: 2026-05-06 +# +# Zweck: Hermes laedt diese Datei beim Alert-Anreichern, um Abhaengigkeiten, +# Dump-Zeitstempel und den ersten Diagnoseschritt nachzuschlagen. +# +# Felder: +# description - Kurzbeschreibung des Dienstes +# tier - Kritikalitaet: 1=Control Plane, 2=User Apps, 3=Ops/Tools +# category - core | security | infra | app | ops +# container_name - exakter Docker-Containername (fuer docker inspect) +# dependencies - Liste direkter Laufzeit-Abhaengigkeiten (andere Service-Keys) +# url - oeffentliche URL (null = intern/LAN only) +# dump_file - Dateiname in /mnt/user/backups/borg/dumps/latest/ (null = kein Dump) +# data_paths - kritische Datenpfade auf dem Host +# first_check - erster Diagnoseschritt bei Ausfall (Freitext fuer Hermes) +# notes - betriebliche Hinweise und dokumentierte Ausnahmen + +meta: + dump_base: /mnt/user/backups/borg/dumps/latest + appdata_base: /mnt/user/appdata + secrets_path: /mnt/user/appdata/secrets + +# --------------------------------------------------------------------------- +# TIER 1 — Control Plane (Ausfall blockiert alles darunter) +# --------------------------------------------------------------------------- + +services: + + traefik: + description: Zentraler Reverse Proxy, TLS, Docker-Label-Routing + tier: 1 + category: core + container_name: traefik + dependencies: [] + url: https://traefik.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/traefik/dynamic + - /mnt/user/appdata/traefik/letsencrypt + first_check: "Host-Ports 80/443 erreichbar? dynamic/ korrekt auf Host synchronisiert?" + notes: "dynamic configs werden NICHT automatisch von Komodo deployed — manueller Host-Sync noetig" + + adguard: + description: DNS-Server / LAN DNS + tier: 1 + category: core + container_name: adguard + dependencies: + - unbound + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/adguard/conf + - /mnt/user/appdata/adguard/work + first_check: "Port 53 erreichbar? Unbound healthy? dns_net Konnektivitaet?" + notes: "Ports 53 und 8082 dokumentierte Host-Port-Ausnahmen" + + unbound: + description: Upstream DNS Resolver fuer AdGuard + tier: 1 + category: core + container_name: unbound + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/unbound/config + first_check: "dns_net Konnektivitaet pruefen; Container-Logs auf Fehler pruefen" + notes: "rebuildbar; isoliert in dns_net" + + tailscale: + description: VPN / Remote-Zugang + tier: 1 + category: core + container_name: tailscale + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/tailscale + first_check: "Tailscale Status auf Host pruefen; State-Datei fuer Key-Renewal vorhanden?" + notes: "network_mode: host; NET_ADMIN, NET_RAW, /dev/net/tun — dokumentierte VPN-Ausnahmen" + + gitea: + description: Git-Server — operative Quelle der Wahrheit fuer GitOps + tier: 1 + category: core + container_name: gitea + dependencies: + - traefik + url: https://git.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/services/gitea/data + first_check: "HTTPS erreichbar? SQLite in /data intakt? SSH-Port 222 erreichbar?" + notes: "SQLite in /data — kein separater Dump; ohne externen Mirror im DR kritisch" + + authelia: + description: ForwardAuth — zentrale Authentifizierung fuer Admin-UIs + tier: 1 + category: security + container_name: authelia + dependencies: + - postgresql17 + - traefik + url: https://auth.kaleschke.info + dump_file: postgresql17-authelia.dump + data_paths: + - /mnt/user/appdata/authelia/config + first_check: "PostgreSQL healthy? SMTP via GMX erreichbar? Host-Config aktuell (Repo-Baseline != Host)?" + notes: "kein Redis-Session-Backend; SMTP-Notifier GMX; Repo-Baseline muss manuell in Host-Config gemerged werden" + + vaultwarden: + description: Passwort-Tresor + tier: 1 + category: security + container_name: vaultwarden + dependencies: + - traefik + url: https://vault.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/vaultwarden + first_check: "HTTPS erreichbar? Appdata-Volume intakt?" + notes: "ADMIN_TOKEN_FILE; keine direkten Host-Ports" + + postgresql17: + description: Shared PostgreSQL Cluster (Authelia, Paperless, Mail-Archiver, Mealie, Komodo indirekt) + tier: 1 + category: infra + container_name: postgresql17 + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/postgresql17 + first_check: "backend_net Konnektivitaet? Disk-Space auf /mnt/user/appdata? pg_isready im Container?" + notes: "Dumps per Dienst unter dumps/latest; raw DB nicht primaerer Restore-Weg" + + komodo-core: + description: GitOps UI / API / Stack-Manager + tier: 1 + category: ops + container_name: komodo-core + dependencies: + - komodo-mongo + - gitea + - traefik + url: https://komodo.kaleschke.info + dump_file: komodo-mongo.archive.gz + data_paths: + - /mnt/user/appdata/komodo/core + first_check: "MongoDB healthy? Gitea erreichbar? komodo_net Konnektivitaet?" + notes: "keine pauschale Authelia-ForwardAuth; Gitea DNS override konfiguriert" + + komodo-mongo: + description: Komodo Datenbank (MongoDB) + tier: 1 + category: infra + container_name: komodo-mongo + dependencies: [] + url: null + dump_file: komodo-mongo.archive.gz + data_paths: + - /mnt/user/appdata/komodo/mongo + first_check: "komodo_net Konnektivitaet? Disk-Space? mongosh ping?" + notes: "Dump-Integritaet nach Major-Upgrades pruefen" + + komodo-periphery: + description: Komodo Host-Agent (Stack-Deployments) + tier: 1 + category: ops + container_name: komodo-periphery + dependencies: + - komodo-core + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/komodo/periphery + first_check: "Docker-Socket lesbar? /mnt/user/services gemountet? komodo_net Verbindung zu Core?" + notes: "Docker-Socket-Ausnahme dokumentiert; /mnt/user/services Mount fuer Stack-Workspaces" + +# --------------------------------------------------------------------------- +# TIER 2 — User Apps +# --------------------------------------------------------------------------- + + redis: + description: Shared Redis Cache (Paperless, weitere) + tier: 2 + category: infra + container_name: redis + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/redis + first_check: "backend_net Konnektivitaet? redis-cli ping erreichbar?" + notes: "transiente Daten; bewusst nicht Backup-kritisch" + + paperless-ngx: + description: Dokumentenmanagement + tier: 2 + category: app + container_name: paperless-ngx + dependencies: + - postgresql17 + - redis + - traefik + url: https://paperless.kaleschke.info + dump_file: postgresql17-paperless.dump + data_paths: + - /mnt/user/appdata/paperless-ngx/data + - /mnt/user/documents/paperless + - /mnt/user/documents/scans_inbox + first_check: "Redis healthy? PostgreSQL healthy? backend_net Konnektivitaet?" + notes: "DB/Redis Secrets als Stack ENV (keine _FILE Variante)" + + paperless-gpt: + description: KI-Ergaenzung fuer Paperless (OCR/Tagging via LLM) + tier: 2 + category: app + container_name: paperless-gpt + dependencies: + - paperless-ngx + - traefik + url: https://paperless-gpt.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/paperless-gpt/data + - /mnt/user/appdata/paperless-gpt/prompts + first_check: "Paperless API erreichbar? LLM/Ollama erreichbar? API Token gesetzt?" + notes: "API Token als Stack ENV; abhaengig von laufendem Paperless" + + immich_server: + description: Foto-/Video-App + tier: 2 + category: app + container_name: immich_server + dependencies: + - immich_postgres + - immich_redis + - immich_machine_learning + - traefik + url: https://immich.kaleschke.info + dump_file: immich.dump + data_paths: + - /mnt/user/photos/immich + - /mnt/user/photos/family_archive + first_check: "immich_postgres healthy? immich_redis healthy? ML-Container healthy? immich_default Netz?" + notes: "native App-Auth; externes Fotoarchiv gemountet" + + immich_postgres: + description: Immich-Datenbank + tier: 2 + category: infra + container_name: immich_postgres + dependencies: [] + url: null + dump_file: immich.dump + data_paths: + - /mnt/user/appdata/immich_postgres + first_check: "immich_default Netz? Disk-Space? pg_isready?" + notes: "nie ins frontend_net; immich_default Netz isoliert" + + immich_redis: + description: Immich Cache + tier: 2 + category: infra + container_name: immich_redis + dependencies: [] + url: null + dump_file: null + data_paths: [] + first_check: "immich_default Netz? redis-cli ping?" + notes: "rebuildbar; anonymes Volume — named volume als offenes TODO" + + immich_machine_learning: + description: Immich ML (Gesichtserkennung, Suche) + tier: 2 + category: infra + container_name: immich_machine_learning + dependencies: [] + url: null + dump_file: null + data_paths: + - model-cache + first_check: "immich_default Netz? model-cache Volume vorhanden?" + notes: "rebuildbar; intern-only" + + mealie: + description: Rezeptverwaltung + tier: 2 + category: app + container_name: mealie + dependencies: + - mealie-postgres + - traefik + url: https://mealie.kaleschke.info + dump_file: mealie.dump + data_paths: + - /mnt/user/appdata/mealie/data + first_check: "mealie-postgres healthy? mealie_internal Netz erreichbar?" + notes: "App + DB in internem Netz getrennt (mealie_internal)" + + mealie-postgres: + description: Mealie-Datenbank + tier: 2 + category: infra + container_name: mealie-postgres + dependencies: [] + url: null + dump_file: mealie.dump + data_paths: + - /mnt/user/appdata/mealie/postgres + first_check: "mealie_internal Netz? Disk-Space?" + notes: "interne DB; mealie_internal Netz" + + mail-archiver: + description: Mail-Archivierung (IMAP) + tier: 2 + category: app + container_name: mail-archiver + dependencies: + - postgresql17 + - authelia + - traefik + url: https://mail.kaleschke.info + dump_file: postgresql17-mailarchiver.dump + data_paths: + - /mnt/user/appdata/mailarchiver/data-protection-keys + first_check: "PostgreSQL healthy? Internet-/IMAP-Zugang? Authelia healthy?" + notes: "Hybrid: frontend_net fuer IMAP/Internet, backend_net fuer DB" + + nextcloud: + description: Datei-/Cloud-Dienst + tier: 2 + category: app + container_name: nextcloud + dependencies: + - nextcloud-postgres + - nextcloud-redis + - traefik + url: https://cloud.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/nextcloud/html + - /mnt/user/documents/nextcloud-data + first_check: "nextcloud-postgres healthy? nextcloud-redis healthy? nextcloud_internal Netz?" + notes: "native App-Auth (kein zentrales ForwardAuth); WebDAV/CardDAV beachten" + + nextcloud-postgres: + description: Nextcloud-Datenbank + tier: 2 + category: infra + container_name: nextcloud-postgres + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/nextcloud/postgres + first_check: "nextcloud_internal Netz? Disk-Space?" + notes: "interne DB" + + nextcloud-redis: + description: Nextcloud Cache / Locking + tier: 2 + category: infra + container_name: nextcloud-redis + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/nextcloud/redis + first_check: "nextcloud_internal Netz? redis-cli ping?" + notes: "rebuildbar" + + ntfy: + description: Push-Benachrichtigungen (Alert-Backbone) + tier: 2 + category: app + container_name: ntfy + dependencies: + - traefik + url: https://ntfy.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/ntfy + first_check: "HTTPS erreichbar? NTFY_BEHIND_PROXY=true gesetzt? Traefik healthy?" + notes: "KRITISCH: Ausfall bedeutet keine anderen Alerts ankommen; Monitoring/Borg-Benachrichtigungen" + +# --------------------------------------------------------------------------- +# TIER 3 — Ops / Tools (Ausfall schmerzt, blockiert nichts Kritisches) +# --------------------------------------------------------------------------- + + homepage: + description: Start-Dashboard + tier: 3 + category: ops + container_name: homepage + dependencies: + - traefik + url: https://home.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/homepage + first_check: "Traefik erreichbar? Docker-Socket read-only lesbar? API-Tokens gueltig?" + notes: "Docker socket read-only; viele API Tokens in Config" + + uptime-kuma: + description: Monitoring / Uptime Checks + tier: 3 + category: ops + container_name: UptimeKuma + dependencies: + - traefik + url: https://uptime.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/uptime-kuma + first_check: "Datenbank-Volume intakt? Traefik erreichbar?" + notes: "Monitore nach Restore manuell pruefen" + + grafana: + description: Metrik-Dashboard + tier: 3 + category: ops + container_name: grafana + dependencies: + - influxdb3-core + - traefik + url: https://grafana.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/grafana + first_check: "influxdb3-core healthy? Datasource-Token in Secret gesetzt? Provisioning-Konfig vorhanden?" + notes: "laeuft als user 0 wegen Host-Appdata-Permissions (dokumentiert); Datasource wird provisioniert" + + influxdb3-core: + description: Zeitreihen- / Metrikdaten fuer Grafana und Home Assistant + tier: 3 + category: ops + container_name: influxdb3-core + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/influxdb3/data + - /mnt/user/appdata/influxdb3/plugins + first_check: "LAN-Port 8181 erreichbar? 401 ohne Token = OK (erwartet). Disk-Space?" + notes: "LAN-only Host-Port 8181; kein frontend_net; laeuft als user 0" + + scrutiny: + description: Laufwerks- / SMART-Monitoring + tier: 3 + category: ops + container_name: scrutiny + dependencies: + - traefik + url: https://scrutiny.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/scrutiny/config + - /mnt/user/appdata/scrutiny/influxdb + first_check: "Device-Mounts vorhanden? privileged=true gesetzt? Traefik erreichbar?" + notes: "privileged: true dokumentierte Ausnahme" + + glances: + description: System- / Container-Monitoring + tier: 3 + category: ops + container_name: glances + dependencies: + - traefik + url: https://glances.kaleschke.info + dump_file: null + data_paths: [] + first_check: "Docker-Socket lesbar? rootfs gemountet? Traefik erreichbar?" + notes: "rebuildbar; Docker-Socket und rootfs Mounts" + + borg-ui: + description: Borg Backup- / Restore UI + tier: 3 + category: ops + container_name: borg-ui + dependencies: + - traefik + url: https://borg.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/borg-ui/data + - /mnt/user/backups/borg/dumps + first_check: "Borg-Repo-Credentials vorhanden? Backup-Mounts erreichbar? Traefik healthy?" + notes: "breite Mounts bewusst dokumentiert; /local/secrets im DR-Scope" + + backrest: + description: Backup-Admin-Dienst (Legacy-Backup-Ebene) + tier: 3 + category: ops + container_name: backrest + dependencies: + - traefik + url: https://backrest.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/backrest + first_check: "Repo/SSH-Mounts erreichbar? Traefik healthy?" + notes: "breite Mounts bewusst dokumentiert" + + hermes-gateway: + description: Hermes Agent Gateway / AI Ops Assistant + tier: 3 + category: ops + container_name: hermes-gateway + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/hermes-agent/data + first_check: "hermes_net:8642/health erreichbar? SSH-Key gemountet? LLM-Provider erreichbar?" + notes: "kein Docker-Socket; SSH terminal backend; echte .env auf Host-Appdata" + + ddns-updater: + description: Cloudflare / DDNS Aktualisierung + tier: 3 + category: infra + container_name: ddns-updater + dependencies: [] + url: null + dump_file: null + data_paths: + - /mnt/user/appdata/ddns-updater + first_check: "Internetzugang? Cloudflare API erreichbar? Config vorhanden?" + notes: "bewusst in frontend_net weil backend_net internal ist" + + code-server: + description: Web-Editor / Operations Workspace + tier: 3 + category: ops + container_name: code-server + dependencies: + - traefik + url: https://code.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/code-server + - /mnt/user/services/dev + first_check: "Traefik erreichbar? PASSWORD_FILE lesbar?" + notes: "PASSWORD_FILE; Workspaces bei Restore beachten" + + filebrowser: + description: Datei-Browser fuer Appdata + tier: 3 + category: ops + container_name: filebrowser + dependencies: + - traefik + url: https://files.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/filebrowser + first_check: "Appdata-Mounts erreichbar? Traefik healthy?" + notes: "breiter /mnt/user/appdata Mount; Einschraenkung langfristig als TODO" + + speedtest-tracker: + description: Speedtest-Monitoring + tier: 3 + category: ops + container_name: speedtest-tracker + dependencies: + - traefik + url: https://speedtest.kaleschke.info + dump_file: null + data_paths: + - /mnt/user/appdata/speedtest-tracker/config + first_check: "APP_KEY gesetzt? Internetzugang fuer Speedtest vorhanden?" + notes: "APP_KEY, ADMIN_PASSWORD als Stack ENV" + + bentopdf: + description: PDF-Tooling + tier: 3 + category: app + container_name: bentopdf + dependencies: + - traefik + url: https://pdf.kaleschke.info + dump_file: null + data_paths: [] + first_check: "COOP/COEP Middleware gesetzt? Traefik healthy?" + notes: "rebuildbar; keine kritische Persistenz; Live-Status pruefen" diff --git a/ops/hermes-agent/skills/homelab-ops-monitor.md b/ops/hermes-agent/skills/homelab-ops-monitor.md new file mode 100644 index 0000000..e4715aa --- /dev/null +++ b/ops/hermes-agent/skills/homelab-ops-monitor.md @@ -0,0 +1,153 @@ +# Skill: homelab-ops-monitor + +## Zweck + +Dieser Skill macht Hermes zum kontextuellen Ops-Assistenten fuer das Kallilabcore-Homelab. +Wenn ein Container unhealthy wird, liefert dieser Skill keine rohe Fehlermeldung, +sondern einen angereicherten Alert: Was ist kaputt, welche Abhaengigkeiten sind betroffen, +wie alt ist der letzte Backup-Dump, und was ist der erste konkrete Diagnoseschritt. + +--- + +## Wann aktivieren + +- Wenn ein Container unhealthy gemeldet wird (manuell oder via Cronjob) +- Wenn der Benutzer fragt: "Was ist kaputt?" / "Was ist mit [Service]?" +- Wenn ein proaktiver Health-Check ausgefuehrt werden soll +- Wenn ein ntfy-Alert angereichert werden soll bevor er gesendet wird + +--- + +## Kernprinzipien + +1. **Immer check_health.py ausfuehren** — nie raten, immer messen. +2. **Kontext aus services.yaml** — Abhaengigkeiten und Dump-Info sind dort definiert. +3. **ntfy-Alert nur wenn wirklich etwas unhealthy ist** — kein Alert-Spam. +4. **Tier 1 = urgent, Tier 2 = high, Tier 3 = default** — ntfy Priority entsprechend setzen. +5. **Kein Schreiben, kein Neustart** — dieser Skill diagnostiziert, handelt nicht. + +--- + +## Ausfuehrungsschritte + +### Schritt 1 — Health-Check ausfuehren + +Fuehre via Terminal (SSH) auf dem Host aus: + +```bash +python3 /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py +``` + +Fuer einen gezielten Service: +```bash +python3 /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py +``` + +Fuer den Gesamtstatus (Tier 1+2): +```bash +python3 /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py --summary +``` + +### Schritt 2 — JSON-Output interpretieren + +Der Report enthaelt je Service: +- `tier` — Kritikalitaet (1=Control Plane, 2=App, 3=Ops) +- `container.healthy` — aktueller Gesundheitsstatus +- `unhealthy_deps` — Liste der ebenfalls unhealthy Abhaengigkeiten +- `dump.age_hours` — Alter des letzten Dumps in Stunden (>26h = Warnung) +- `dump.warn` — true wenn Dump veraltet +- `first_check` — erster Diagnoseschritt laut service catalog +- `notes` — betriebliche Hinweise + +### Schritt 3 — ntfy-Alert bauen + +Baue eine ntfy-Nachricht nach diesem Format: + +``` +[Titel] +[Tier-Emoji] [service-key] unhealthy (Tier [N]) + +Beschreibung: [description] + +Abhaengigkeiten: + [✅/❌] [dep-key] — [status] + +Letzter Dump: [age_hours]h alt [✅/⚠️] (oder: kein Dump konfiguriert) + +Erster Check: + [first_check] + +Hinweis: [notes] +``` + +Tier-Emojis: Tier 1 = 🔴, Tier 2 = 🟠, Tier 3 = 🟡 +Dump-Warnschwelle: >26 Stunden = ⚠️ + +### Schritt 4 — ntfy senden + +```bash +curl -s \ + -H "Title: [Tier N] [service-key] unhealthy" \ + -H "Priority: [urgent|high|default]" \ + -H "Tags: [warning,tier1|tier2|tier3]" \ + -d "[message]" \ + https://ntfy.kaleschke.info/homelab-alerts +``` + +ntfy Prioritaeten: +- Tier 1 → `urgent` +- Tier 2 → `high` +- Tier 3 → `default` + +--- + +## Sonderfaelle + +### Unbekannter Container (nicht in services.yaml) +-> Alert senden mit Hinweis "nicht in services.yaml — bitte aktualisieren" +-> services.yaml Pfad: `/mnt/user/services/homelab/ops/hermes-agent/services.yaml` + +### ntfy selbst ist unhealthy +-> Alert kann nicht per ntfy gesendet werden +-> Hermes sendet stattdessen via Telegram (falls konfiguriert) +-> Nachricht: "KRITISCH: ntfy ist unhealthy — kein Push-Alerting aktiv" + +### Alle Tier-1-Abhaengigkeiten unhealthy +-> Wahrscheinlich kein isoliertes Problem — Host oder Netzwerk pruefen +-> Zusammenfassenden Alert senden statt Einzel-Alerts + +### check_health.py nicht gefunden +-> Meldung: "Script nicht gefunden unter /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py" +-> Pruefe ob Komodo den Stack zuletzt deployed hat + +--- + +## Cronjob-Empfehlung + +Fuer automatische Checks ohne Uptime-Kuma-Webhook: + +``` +# Jede Stunde — prueft alle unhealthy Container +0 * * * * python3 /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py + +# Taeglich 07:00 — Gesamtstatus Tier 1+2 +0 7 * * * python3 /mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py --summary +``` + +--- + +## Nicht-Ziele dieses Skills + +- **Kein automatischer Neustart** von Containern +- **Kein Schreiben** in Compose-Dateien oder Konfigurationen +- **Kein Deploy** via Komodo +- **Keine Diagnose-Tiefe** jenseits des `first_check`-Hinweises (das ist Aufgabe des Benutzers) + +--- + +## Verwandte Skills und Ressourcen + +- `kallilab-homelab-ops` — Governance-Skill fuer Aenderungsentscheidungen +- `services.yaml` — Wissensbasis: `/mnt/user/services/homelab/ops/hermes-agent/services.yaml` +- `check_health.py` — Ausfuehrungs-Script: `/mnt/user/services/homelab/ops/hermes-agent/scripts/check_health.py` +- Repo: `https://git.kaleschke.info` (origin/master)