32-slide self-contained deck mirroring SETUP-AND-DEPLOY.md structure. Keyboard nav (arrows/space/PageUp-Down/digits/f for fullscreen), swipe, click-to-advance, deep-linkable slides via #s=N, print-friendly. Zero external deps — ships as one HTML file.
1028 lines
35 KiB
HTML
1028 lines
35 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
<title>Proxmox Monitor — Setup & Deploy</title>
|
||
<style>
|
||
:root {
|
||
--bg: #0b1220;
|
||
--bg-2: #121b2f;
|
||
--fg: #e6edf3;
|
||
--muted: #94a3b8;
|
||
--accent: #22c55e;
|
||
--accent-2: #38bdf8;
|
||
--warn: #f59e0b;
|
||
--danger: #ef4444;
|
||
--border: #1e293b;
|
||
--code-bg: #0f172a;
|
||
--code-fg: #cbd5e1;
|
||
}
|
||
|
||
* { box-sizing: border-box; }
|
||
html, body { margin: 0; padding: 0; height: 100%; width: 100%; }
|
||
body {
|
||
background: var(--bg);
|
||
color: var(--fg);
|
||
font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, sans-serif;
|
||
font-size: 18px;
|
||
line-height: 1.5;
|
||
overflow: hidden;
|
||
-webkit-font-smoothing: antialiased;
|
||
}
|
||
|
||
.deck {
|
||
position: relative;
|
||
width: 100vw;
|
||
height: 100vh;
|
||
overflow: hidden;
|
||
}
|
||
|
||
.slide {
|
||
position: absolute;
|
||
inset: 0;
|
||
display: none;
|
||
padding: 4vw 6vw 6vw 6vw;
|
||
overflow: auto;
|
||
}
|
||
.slide.active { display: flex; flex-direction: column; }
|
||
|
||
.slide h1 {
|
||
font-size: clamp(1.6rem, 3.2vw, 2.8rem);
|
||
font-weight: 700;
|
||
letter-spacing: -0.02em;
|
||
margin: 0 0 1rem 0;
|
||
line-height: 1.15;
|
||
}
|
||
.slide h2 {
|
||
font-size: clamp(1.1rem, 1.6vw, 1.4rem);
|
||
font-weight: 500;
|
||
color: var(--muted);
|
||
margin: 0 0 2rem 0;
|
||
letter-spacing: 0.02em;
|
||
}
|
||
.slide h3 {
|
||
font-size: 1.15rem;
|
||
font-weight: 600;
|
||
margin: 1.2rem 0 0.5rem 0;
|
||
color: var(--accent-2);
|
||
}
|
||
|
||
.eyebrow {
|
||
display: inline-block;
|
||
font-size: 0.78rem;
|
||
font-weight: 600;
|
||
letter-spacing: 0.16em;
|
||
text-transform: uppercase;
|
||
color: var(--accent);
|
||
margin-bottom: 0.6rem;
|
||
border-left: 3px solid var(--accent);
|
||
padding-left: 0.7rem;
|
||
}
|
||
|
||
.slide ul, .slide ol {
|
||
padding-left: 1.3rem;
|
||
margin: 0.5rem 0 1rem 0;
|
||
}
|
||
.slide li { margin: 0.35rem 0; }
|
||
|
||
.slide .muted { color: var(--muted); }
|
||
.slide strong { color: #ffffff; }
|
||
.slide code {
|
||
font-family: "SF Mono", Menlo, Consolas, "Liberation Mono", monospace;
|
||
background: var(--code-bg);
|
||
padding: 0.1rem 0.4rem;
|
||
border-radius: 4px;
|
||
font-size: 0.92em;
|
||
color: var(--code-fg);
|
||
}
|
||
.slide pre {
|
||
background: var(--code-bg);
|
||
border: 1px solid var(--border);
|
||
border-radius: 8px;
|
||
padding: 1rem 1.2rem;
|
||
overflow-x: auto;
|
||
font-family: "SF Mono", Menlo, Consolas, "Liberation Mono", monospace;
|
||
font-size: clamp(0.72rem, 0.95vw, 0.95rem);
|
||
line-height: 1.5;
|
||
color: var(--code-fg);
|
||
margin: 0.6rem 0 1rem 0;
|
||
}
|
||
.slide pre .c { color: #64748b; }
|
||
.slide pre .k { color: var(--accent-2); }
|
||
.slide pre .s { color: #fca5a5; }
|
||
.slide pre .n { color: var(--warn); }
|
||
|
||
.grid {
|
||
display: grid;
|
||
grid-template-columns: 1fr 1fr;
|
||
gap: 1.5rem;
|
||
}
|
||
.grid-3 {
|
||
display: grid;
|
||
grid-template-columns: repeat(3, 1fr);
|
||
gap: 1rem;
|
||
}
|
||
|
||
.card {
|
||
background: var(--bg-2);
|
||
border: 1px solid var(--border);
|
||
border-radius: 10px;
|
||
padding: 1.2rem 1.3rem;
|
||
}
|
||
.card .label {
|
||
font-size: 0.72rem;
|
||
text-transform: uppercase;
|
||
letter-spacing: 0.12em;
|
||
color: var(--muted);
|
||
margin-bottom: 0.4rem;
|
||
}
|
||
.card .value { font-weight: 600; color: #ffffff; }
|
||
|
||
.callout {
|
||
border-left: 4px solid var(--warn);
|
||
background: rgba(245, 158, 11, 0.08);
|
||
padding: 0.8rem 1.1rem;
|
||
border-radius: 4px;
|
||
margin: 0.8rem 0;
|
||
color: #fde68a;
|
||
font-size: 0.95rem;
|
||
}
|
||
.callout.danger {
|
||
border-color: var(--danger);
|
||
background: rgba(239, 68, 68, 0.08);
|
||
color: #fecaca;
|
||
}
|
||
.callout.info {
|
||
border-color: var(--accent-2);
|
||
background: rgba(56, 189, 248, 0.08);
|
||
color: #bae6fd;
|
||
}
|
||
|
||
.check {
|
||
display: flex;
|
||
align-items: flex-start;
|
||
gap: 0.8rem;
|
||
padding: 0.45rem 0;
|
||
border-bottom: 1px solid var(--border);
|
||
font-size: 0.95rem;
|
||
}
|
||
.check:last-child { border-bottom: none; }
|
||
.check .box {
|
||
flex: 0 0 auto;
|
||
width: 18px;
|
||
height: 18px;
|
||
border: 2px solid var(--accent);
|
||
border-radius: 3px;
|
||
margin-top: 3px;
|
||
}
|
||
|
||
table { width: 100%; border-collapse: collapse; margin: 0.6rem 0; font-size: 0.9rem; }
|
||
th, td {
|
||
text-align: left;
|
||
padding: 0.45rem 0.6rem;
|
||
border-bottom: 1px solid var(--border);
|
||
vertical-align: top;
|
||
}
|
||
th { color: var(--muted); font-weight: 600; font-size: 0.78rem; letter-spacing: 0.08em; text-transform: uppercase; }
|
||
td code { font-size: 0.85em; }
|
||
|
||
.ascii {
|
||
font-family: "SF Mono", Menlo, Consolas, monospace;
|
||
font-size: clamp(0.72rem, 0.95vw, 1rem);
|
||
color: var(--code-fg);
|
||
white-space: pre;
|
||
background: var(--code-bg);
|
||
border: 1px solid var(--border);
|
||
border-radius: 8px;
|
||
padding: 1rem 1.2rem;
|
||
overflow-x: auto;
|
||
}
|
||
|
||
/* Title slide */
|
||
.title-slide {
|
||
justify-content: center;
|
||
align-items: flex-start;
|
||
}
|
||
.title-slide h1 {
|
||
font-size: clamp(2.5rem, 6vw, 5rem);
|
||
margin-bottom: 0.5rem;
|
||
}
|
||
.title-slide .subtitle {
|
||
font-size: clamp(1.2rem, 2vw, 1.6rem);
|
||
color: var(--muted);
|
||
margin-bottom: 1.5rem;
|
||
}
|
||
.title-slide .meta {
|
||
margin-top: 2rem;
|
||
color: var(--muted);
|
||
font-size: 0.9rem;
|
||
}
|
||
|
||
/* End slide */
|
||
.end-slide { justify-content: center; align-items: center; text-align: center; }
|
||
.end-slide h1 { font-size: clamp(2rem, 5vw, 4rem); }
|
||
|
||
/* Chrome */
|
||
.topbar {
|
||
position: fixed;
|
||
top: 0; left: 0; right: 0;
|
||
display: flex;
|
||
justify-content: space-between;
|
||
align-items: center;
|
||
padding: 0.9rem 1.4rem;
|
||
font-size: 0.78rem;
|
||
color: var(--muted);
|
||
z-index: 10;
|
||
pointer-events: none;
|
||
}
|
||
.topbar .brand { letter-spacing: 0.12em; text-transform: uppercase; }
|
||
.topbar .counter { font-variant-numeric: tabular-nums; }
|
||
|
||
.progress {
|
||
position: fixed;
|
||
bottom: 0; left: 0;
|
||
height: 2px;
|
||
background: var(--accent);
|
||
transition: width 0.25s ease;
|
||
z-index: 10;
|
||
}
|
||
|
||
.footer-hint {
|
||
position: fixed;
|
||
bottom: 0.7rem; right: 1.4rem;
|
||
font-size: 0.72rem;
|
||
color: var(--muted);
|
||
z-index: 10;
|
||
pointer-events: none;
|
||
}
|
||
|
||
@media (max-width: 720px) {
|
||
.grid, .grid-3 { grid-template-columns: 1fr; }
|
||
.slide { padding: 5vw 5vw 10vw 5vw; }
|
||
}
|
||
|
||
@media print {
|
||
.topbar, .progress, .footer-hint { display: none; }
|
||
.deck, .slide { position: static; height: auto; overflow: visible; }
|
||
.slide { display: block !important; page-break-after: always; min-height: 100vh; }
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
|
||
<div class="topbar">
|
||
<span class="brand">Proxmox Monitor · Setup & Deploy</span>
|
||
<span class="counter"><span id="current">1</span> / <span id="total">1</span></span>
|
||
</div>
|
||
<div class="progress" id="progress" style="width:0%"></div>
|
||
<div class="footer-hint">← → or space to navigate · <kbd>f</kbd> fullscreen</div>
|
||
|
||
<div class="deck" id="deck">
|
||
|
||
<!-- 1 -->
|
||
<section class="slide title-slide">
|
||
<span class="eyebrow">Runbook</span>
|
||
<h1>Proxmox Monitor</h1>
|
||
<div class="subtitle">Setup & Deployment — Production Rollout</div>
|
||
<p class="muted" style="max-width:48rem">
|
||
Agent-server monitoring for Proxmox hosts. Elixir/OTP backend, Burrito-packaged agents,
|
||
Phoenix LiveView dashboard. This deck walks you from a clean environment to 20 hosts reporting,
|
||
in order, with verification at every step.
|
||
</p>
|
||
<div class="meta">
|
||
Reference: <code>SETUP-AND-DEPLOY.md</code> · ~2–3h end-to-end + host rollout time
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 2 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">What you're deploying</span>
|
||
<h1>Architecture</h1>
|
||
<h2>Two artifacts, independent pipelines, one dashboard</h2>
|
||
<div class="ascii">
|
||
┌─────────────────────────┐
|
||
│ Server (LXC in RZ) │
|
||
agents ──WSS──│ · Phoenix release │
|
||
│ · SQLite │
|
||
│ · Caddy (TLS) │
|
||
└─────────────────────────┘
|
||
▲
|
||
│ ssh
|
||
┌─────────────────────────┐
|
||
│ Operator workstation │
|
||
│ · builds server │
|
||
│ · builds agent binary │
|
||
└─────────────────────────┘
|
||
│ scp
|
||
▼
|
||
┌─────────────────────────┐
|
||
│ Proxmox host (1 of N) │
|
||
│ · Burrito binary │
|
||
│ · systemd unit │
|
||
└─────────────────────────┘
|
||
</div>
|
||
<p class="muted">Agents initiate outbound WSS — no inbound ports on Proxmox hosts.</p>
|
||
</section>
|
||
|
||
<!-- 3 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">Phases</span>
|
||
<h1>Roadmap for this deck</h1>
|
||
<ol>
|
||
<li><strong>Preflight</strong> — confirm prerequisites</li>
|
||
<li><strong>Local build</strong> — produce the two artifacts</li>
|
||
<li><strong>Server deploy</strong> — one-time LXC bring-up</li>
|
||
<li><strong>First agent</strong> — prove the pipeline end-to-end</li>
|
||
<li><strong>Test tier</strong> — 2–3 hosts for 24h</li>
|
||
<li><strong>Full rollout</strong> — the remaining fleet</li>
|
||
<li><strong>Rollback</strong> — because things go wrong</li>
|
||
<li><strong>Ongoing operations</strong> — upgrades, backups, rotation</li>
|
||
<li><strong>Go / No-Go</strong> — final sign-off</li>
|
||
</ol>
|
||
</section>
|
||
|
||
<!-- 4 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 1 Preflight</span>
|
||
<h1>Hardware & network</h1>
|
||
<div class="grid">
|
||
<div class="card">
|
||
<div class="label">Server LXC</div>
|
||
<div class="value">Debian 12 · 1 GB RAM · 2 cores · 10 GB</div>
|
||
<p class="muted" style="margin:.5rem 0 0 0">Unprivileged. Covers >20 agents comfortably.</p>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">DNS</div>
|
||
<div class="value">A record → public IP</div>
|
||
<p class="muted" style="margin:.5rem 0 0 0">Verify: <code>dig +short monitor.example.com</code></p>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Inbound</div>
|
||
<div class="value">TCP 443 → server LXC</div>
|
||
<p class="muted" style="margin:.5rem 0 0 0">Caddy handles Let's Encrypt via HTTP-01.</p>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Outbound</div>
|
||
<div class="value">HTTPS from every Proxmox host</div>
|
||
<p class="muted" style="margin:.5rem 0 0 0">No inbound port required on hosts.</p>
|
||
</div>
|
||
</div>
|
||
<p class="muted" style="margin-top:1rem">SSH root access: hypervisor + every Proxmox host.</p>
|
||
</section>
|
||
|
||
<!-- 5 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 1 Preflight</span>
|
||
<h1>Versions & tools</h1>
|
||
<div class="grid">
|
||
<div>
|
||
<h3>Proxmox fleet</h3>
|
||
<ul>
|
||
<li>VE <strong>8.3+</strong></li>
|
||
<li>OpenZFS <strong>2.3+</strong> (for <code>-j</code> JSON output)</li>
|
||
<li>Older hosts will report empty ZFS payloads</li>
|
||
</ul>
|
||
</div>
|
||
<div>
|
||
<h3>Build machine</h3>
|
||
<ul>
|
||
<li>Elixir <strong>1.19</strong> + OTP <strong>28</strong></li>
|
||
<li>Mix + Hex</li>
|
||
<li><strong>Docker</strong> daemon running (for Linux binaries)</li>
|
||
<li>SSH, scp, <code>sqlite3</code> (optional)</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<div class="callout info">
|
||
No Docker? Run <code>./scripts/build-linux.sh</code> on the server LXC itself instead.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 6 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 1 Preflight</span>
|
||
<h1>Secrets plan</h1>
|
||
<h2>Three values — keep in a password manager, never in git</h2>
|
||
<table>
|
||
<thead><tr><th>Secret</th><th>How to generate</th></tr></thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>DASHBOARD_PASSWORD_HASH</code></td>
|
||
<td><code>mix run -e 'IO.puts(Argon2.hash_pwd_salt("<pw>"))'</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>SECRET_KEY_BASE</code></td>
|
||
<td><code>mix phx.gen.secret</code> (64-byte base64)</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Per-agent tokens</td>
|
||
<td>Admin UI → <em>Add host</em> reveals token once</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<div class="callout danger">
|
||
Tokens are shown <strong>once</strong>. Paste into your password manager before clicking away.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 7 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 2 Local build</span>
|
||
<h1>Tests first</h1>
|
||
<h2>If either suite is red, stop</h2>
|
||
<pre><code>cd server && mix deps.get && mix test
|
||
cd ../agent && mix deps.get && mix test</code></pre>
|
||
<div class="grid">
|
||
<div class="card">
|
||
<div class="label">Server</div>
|
||
<div class="value">58 tests, 0 failures</div>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Agent</div>
|
||
<div class="value">23 tests, 0 failures</div>
|
||
</div>
|
||
</div>
|
||
<p class="muted">Never build a release from a branch with failing tests.</p>
|
||
</section>
|
||
|
||
<!-- 8 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 2 Local build</span>
|
||
<h1>Hash the password</h1>
|
||
<pre><code>cd server
|
||
mix run -e 'IO.puts(Argon2.hash_pwd_salt("your-password"))'</code></pre>
|
||
<p>Output looks like:</p>
|
||
<pre><code>$argon2id$v=19$m=65536,t=3,p=4$dSB9...$x0OQ...</code></pre>
|
||
<div class="callout">
|
||
Copy the whole <code>$argon2id$...</code> string into your password manager.
|
||
The plaintext password never leaves your head / password manager.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 9 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 2 Local build</span>
|
||
<h1>Server release</h1>
|
||
<pre><code>MIX_ENV=prod DASHBOARD_PASSWORD_HASH='placeholder' \
|
||
mix release --overwrite
|
||
|
||
tar -czf /tmp/server_release.tgz -C _build/prod/rel server
|
||
ls -lh /tmp/server_release.tgz</code></pre>
|
||
<p>Expected: ~30–60 MB tarball.</p>
|
||
<div class="callout info">
|
||
The <code>placeholder</code> hash only needs to exist so <code>config/runtime.exs</code>
|
||
accepts it. The real hash is supplied on the LXC at start time.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 10 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 2 Local build</span>
|
||
<h1>Agent binaries</h1>
|
||
<pre><code>cd ../agent
|
||
./scripts/build-linux.sh</code></pre>
|
||
<p>Expected output:</p>
|
||
<pre><code>Binaries written to /.../agent/dist:
|
||
proxmox-monitor-agent_linux_amd64
|
||
proxmox-monitor-agent_linux_arm64</code></pre>
|
||
<p>Sanity check:</p>
|
||
<pre><code>file dist/proxmox-monitor-agent_linux_amd64 | grep 'ELF 64-bit'</code></pre>
|
||
<p class="muted">First build: 5–10 min. Subsequent builds: seconds (Docker layer cache).</p>
|
||
</section>
|
||
|
||
<!-- 11 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Create the LXC</h1>
|
||
<p>On the hypervisor:</p>
|
||
<pre><code>pct create 200 \
|
||
/var/lib/vz/template/cache/debian-12-standard_12.7-1_amd64.tar.zst \
|
||
--hostname proxmox-monitor \
|
||
--memory 1024 --cores 2 \
|
||
--rootfs local-zfs:10 \
|
||
--net0 name=eth0,bridge=vmbr0,ip=dhcp \
|
||
--unprivileged 1 --features nesting=0 --onboot 1
|
||
|
||
pct start 200
|
||
pct exec 200 -- ip -4 addr show eth0 | grep -Po 'inet \K[\d.]+'</code></pre>
|
||
<p>Save the IP as <code>LXC_IP</code>. Typos here cost hours.</p>
|
||
</section>
|
||
|
||
<!-- 12 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Base packages</h1>
|
||
<p><code>pct enter 200</code> then:</p>
|
||
<pre><code>apt-get update
|
||
apt-get install -y ca-certificates curl gnupg \
|
||
debian-keyring debian-archive-keyring apt-transport-https sqlite3
|
||
|
||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' | \
|
||
gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
|
||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' \
|
||
> /etc/apt/sources.list.d/caddy-stable.list
|
||
|
||
apt-get update && apt-get install -y caddy
|
||
caddy version</code></pre>
|
||
</section>
|
||
|
||
<!-- 13 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Upload and extract the release</h1>
|
||
<p>From your workstation:</p>
|
||
<pre><code>scp /tmp/server_release.tgz root@$LXC_IP:/tmp/</code></pre>
|
||
<p>Inside the LXC:</p>
|
||
<pre><code>mkdir -p /opt/proxmox-monitor
|
||
tar -xzf /tmp/server_release.tgz -C /opt/proxmox-monitor
|
||
ls /opt/proxmox-monitor/server/bin/
|
||
# server migrate server.bat migrate.bat</code></pre>
|
||
</section>
|
||
|
||
<!-- 14 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Environment file</h1>
|
||
<pre><code>install -d -m 0700 /var/lib/proxmox-monitor
|
||
|
||
cat > /etc/default/proxmox-monitor <<'EOF'
|
||
DATABASE_PATH=/var/lib/proxmox-monitor/monitor.db
|
||
SECRET_KEY_BASE=<paste-mix-phx.gen.secret-output>
|
||
DASHBOARD_PASSWORD_HASH=<paste-$argon2id$-hash>
|
||
PHX_SERVER=true
|
||
PHX_HOST=monitor.example.com
|
||
PORT=4000
|
||
EOF
|
||
chmod 0600 /etc/default/proxmox-monitor</code></pre>
|
||
<div class="callout danger">
|
||
<strong>Single-quoted heredoc matters.</strong> A double-quoted one eats the <code>$</code>
|
||
characters in the Argon2 hash.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 15 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Migrate & systemd</h1>
|
||
<pre><code>set -a; . /etc/default/proxmox-monitor; set +a
|
||
/opt/proxmox-monitor/server/bin/server eval 'Server.Release.migrate()'
|
||
|
||
sqlite3 /var/lib/proxmox-monitor/monitor.db '.tables'
|
||
# hosts metrics schema_migrations</code></pre>
|
||
<p>Then install the systemd unit (see runbook §3.6) with:</p>
|
||
<pre><code>ExecStartPre=/opt/proxmox-monitor/server/bin/server eval 'Server.Release.migrate()'
|
||
ExecStart=/opt/proxmox-monitor/server/bin/server start
|
||
Restart=always
|
||
RestartSec=5</code></pre>
|
||
<pre><code>systemctl daemon-reload && systemctl enable --now proxmox-monitor</code></pre>
|
||
</section>
|
||
|
||
<!-- 16 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Caddy: TLS + WSS reverse-proxy</h1>
|
||
<pre><code>monitor.example.com {
|
||
reverse_proxy 127.0.0.1:4000 {
|
||
header_up X-Forwarded-Proto {scheme}
|
||
header_up X-Forwarded-For {remote_host}
|
||
transport http {
|
||
read_timeout 90s
|
||
dial_timeout 10s
|
||
}
|
||
}
|
||
}</code></pre>
|
||
<div class="callout danger">
|
||
<code>read_timeout 90s</code> is <strong>critical</strong>. Without it, every agent's
|
||
WebSocket is torn down every ~30s and the dashboard stays permanently offline-looking.
|
||
</div>
|
||
<pre><code>caddy validate --config /etc/caddy/Caddyfile
|
||
systemctl reload caddy</code></pre>
|
||
</section>
|
||
|
||
<!-- 17 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 3 Server deploy</span>
|
||
<h1>Server smoke test</h1>
|
||
<p>From anywhere on the internet:</p>
|
||
<pre><code>curl -s https://monitor.example.com/health</code></pre>
|
||
<p>Expected:</p>
|
||
<pre><code>{"db":"ok","status":"ok","version":"0.1.0"}</code></pre>
|
||
<p>Then browser:</p>
|
||
<ul>
|
||
<li>Open <code>https://monitor.example.com/</code> → redirects to <code>/login</code></li>
|
||
<li>Enter your dashboard password → lands on empty overview</li>
|
||
<li>"No hosts registered yet." is the expected empty state</li>
|
||
</ul>
|
||
<div class="callout">
|
||
Login loops on "Incorrect password"? <code>DASHBOARD_PASSWORD_HASH</code> was not pasted
|
||
correctly. Re-generate and redeploy §3.4.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 18 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 4 First agent</span>
|
||
<h1>Register in the admin UI</h1>
|
||
<ol>
|
||
<li>Browser → <code>/admin/hosts</code></li>
|
||
<li>"Register a new host" → enter <strong>short name</strong> (e.g. <code>pve-host-01</code>)</li>
|
||
<li>Click <strong>Add</strong></li>
|
||
<li>The page reveals a token — <strong>copy it now</strong></li>
|
||
</ol>
|
||
<div class="callout danger">
|
||
Tokens are shown <strong>exactly once</strong>. If you close the page without copying,
|
||
Rotate and try again.
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 19 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 4 First agent</span>
|
||
<h1>Deploy binary + config</h1>
|
||
<pre><code>export HOST=pve-host-01
|
||
|
||
scp agent/dist/proxmox-monitor-agent_linux_amd64 \
|
||
root@$HOST:/usr/local/bin/proxmox-monitor-agent
|
||
ssh root@$HOST 'chmod 0755 /usr/local/bin/proxmox-monitor-agent'
|
||
|
||
scp agent/rel/proxmox-monitor-agent.service \
|
||
root@$HOST:/etc/systemd/system/</code></pre>
|
||
<p>On the host — write the TOML config:</p>
|
||
<pre><code>install -d -m 0700 /etc/proxmox-monitor /var/cache/proxmox-monitor-agent
|
||
|
||
cat > /etc/proxmox-monitor/agent.toml <<'EOF'
|
||
server_url = "wss://monitor.example.com/socket/websocket"
|
||
token = "<paste-token-from-dashboard>"
|
||
host_id = "pve-host-01"
|
||
|
||
[intervals]
|
||
fast_seconds = 30
|
||
medium_seconds = 300
|
||
slow_seconds = 1800
|
||
EOF
|
||
chmod 0600 /etc/proxmox-monitor/agent.toml</code></pre>
|
||
</section>
|
||
|
||
<!-- 20 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 4 First agent</span>
|
||
<h1>Enable and verify</h1>
|
||
<pre><code>systemctl daemon-reload
|
||
systemctl enable --now proxmox-monitor-agent
|
||
journalctl -u proxmox-monitor-agent -f</code></pre>
|
||
<p>Expected within 10s:</p>
|
||
<pre><code>agent: starting with host_id=pve-host-01
|
||
reporter: connected, joining host:pve-host-01
|
||
reporter: joined host:pve-host-01</code></pre>
|
||
<p>Reload the dashboard — the card should be <strong>online</strong> (green border)
|
||
with Load / RAM / Pools / VMs populated.</p>
|
||
</section>
|
||
|
||
<!-- 21 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 4 First agent</span>
|
||
<h1>Verify the offline flip</h1>
|
||
<div class="grid">
|
||
<div>
|
||
<h3>Test</h3>
|
||
<pre><code>ssh root@$HOST \
|
||
'systemctl stop proxmox-monitor-agent'</code></pre>
|
||
<p>Dashboard card grey within ~1s.</p>
|
||
<pre><code>ssh root@$HOST \
|
||
'systemctl start proxmox-monitor-agent'</code></pre>
|
||
<p>Green again within 30s.</p>
|
||
</div>
|
||
<div>
|
||
<h3>If the card stays green</h3>
|
||
<p>Channel <code>terminate</code> callback didn't run — usually Caddy.</p>
|
||
<ul>
|
||
<li>Check <code>/etc/caddy/Caddyfile</code> has <code>read_timeout 90s</code></li>
|
||
<li><code>systemctl reload caddy</code> after fixing</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 22 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 5 Test tier</span>
|
||
<h1>2–3 hosts for 24h</h1>
|
||
<p>Pick non-critical hosts, or hosts with independent monitoring to fall back on.</p>
|
||
<p>What to look for overnight:</p>
|
||
<ul>
|
||
<li>All cards remain <strong>online</strong></li>
|
||
<li>No repeating <code>[error]</code> lines in server log</li>
|
||
<li>Retention log line appears: <code>retention: pruned N stale samples</code> (starts firing after 48h)</li>
|
||
</ul>
|
||
<h3>Tests to actively run</h3>
|
||
<ul>
|
||
<li>Reboot a Proxmox host → card goes offline, returns without intervention</li>
|
||
<li><code>systemctl restart proxmox-monitor</code> on the server → all agents flip offline, then green within 30s. No stuck agents.</li>
|
||
</ul>
|
||
</section>
|
||
|
||
<!-- 23 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 5 Test tier</span>
|
||
<h1>Go / No-Go gate</h1>
|
||
<h2>Do NOT proceed to full rollout unless ALL are true for 24h</h2>
|
||
<div class="check"><div class="box"></div>All test-tier hosts show <strong>online</strong> continuously</div>
|
||
<div class="check"><div class="box"></div>No repeating error lines in server logs</div>
|
||
<div class="check"><div class="box"></div>Retention has pruned at least one row</div>
|
||
<div class="check"><div class="box"></div>Token rotation + restart behaves as designed</div>
|
||
<div class="check"><div class="box"></div>Server-reboot drill: all agents recover without intervention</div>
|
||
<div class="check"><div class="box"></div>Dashboard is responsive (<1s LiveView updates)</div>
|
||
</section>
|
||
|
||
<!-- 24 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 6 Full rollout</span>
|
||
<h1>Batch loop</h1>
|
||
<p>After 3–4 hosts by hand, batch:</p>
|
||
<pre><code>for HOST in pve-host-04 pve-host-05 pve-host-06; do
|
||
echo "Register $HOST in admin UI, paste token:"
|
||
read -s TOKEN
|
||
|
||
scp agent/dist/proxmox-monitor-agent_linux_amd64 \
|
||
root@$HOST:/usr/local/bin/proxmox-monitor-agent
|
||
scp agent/rel/proxmox-monitor-agent.service \
|
||
root@$HOST:/etc/systemd/system/
|
||
|
||
ssh root@$HOST "chmod 0755 /usr/local/bin/proxmox-monitor-agent &&
|
||
install -d -m 0700 /etc/proxmox-monitor /var/cache/proxmox-monitor-agent &&
|
||
cat > /etc/proxmox-monitor/agent.toml <<EOF
|
||
server_url = \"wss://monitor.example.com/socket/websocket\"
|
||
token = \"$TOKEN\"
|
||
host_id = \"$HOST\"
|
||
EOF
|
||
chmod 0600 /etc/proxmox-monitor/agent.toml &&
|
||
systemctl daemon-reload &&
|
||
systemctl enable --now proxmox-monitor-agent"
|
||
done</code></pre>
|
||
<p class="muted">After each batch of ~5: spot-check cards, filter for offline, open a random host detail.</p>
|
||
</section>
|
||
|
||
<!-- 25 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 7 Rollback</span>
|
||
<h1>Four escape hatches</h1>
|
||
<div class="grid">
|
||
<div class="card">
|
||
<div class="label">One agent</div>
|
||
<pre style="margin-top:.5rem"><code>ssh root@$HOST \
|
||
'systemctl disable --now proxmox-monitor-agent'</code></pre>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Whole service</div>
|
||
<pre style="margin-top:.5rem"><code>systemctl stop proxmox-monitor
|
||
systemctl stop caddy</code></pre>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Previous release</div>
|
||
<pre style="margin-top:.5rem"><code>systemctl stop proxmox-monitor
|
||
rm -rf /opt/proxmox-monitor/server
|
||
tar -xzf /tmp/server_release_PREV.tgz \
|
||
-C /opt/proxmox-monitor
|
||
systemctl start proxmox-monitor</code></pre>
|
||
</div>
|
||
<div class="card">
|
||
<div class="label">Restore DB</div>
|
||
<pre style="margin-top:.5rem"><code>systemctl stop proxmox-monitor
|
||
cp /var/backups/proxmox-monitor/monitor-YYYY-MM-DD.db \
|
||
/var/lib/proxmox-monitor/monitor.db
|
||
systemctl start proxmox-monitor</code></pre>
|
||
</div>
|
||
</div>
|
||
<p class="muted">Tokens survive DB restores. Metrics post-backup are lost (48h max by retention policy).</p>
|
||
</section>
|
||
|
||
<!-- 26 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 8 Ongoing ops</span>
|
||
<h1>Upgrades</h1>
|
||
<div class="grid">
|
||
<div>
|
||
<h3>Server</h3>
|
||
<pre><code>cd server
|
||
MIX_ENV=prod DASHBOARD_PASSWORD_HASH='placeholder' \
|
||
mix release --overwrite
|
||
tar -czf /tmp/server_release.tgz -C _build/prod/rel server
|
||
scp /tmp/server_release.tgz root@$LXC:/tmp/
|
||
|
||
ssh root@$LXC '
|
||
systemctl stop proxmox-monitor
|
||
mv /opt/proxmox-monitor/server{,.old}
|
||
tar -xzf /tmp/server_release.tgz -C /opt/proxmox-monitor
|
||
systemctl start proxmox-monitor # ExecStartPre runs migrate
|
||
'</code></pre>
|
||
<p class="muted">Verify <code>/health</code> then delete <code>server.old</code>.</p>
|
||
</div>
|
||
<div>
|
||
<h3>Agent</h3>
|
||
<pre><code>scp agent/dist/proxmox-monitor-agent_linux_amd64 \
|
||
root@$HOST:/usr/local/bin/proxmox-monitor-agent.new
|
||
|
||
ssh root@$HOST '
|
||
mv /usr/local/bin/proxmox-monitor-agent{.new,}
|
||
systemctl restart proxmox-monitor-agent
|
||
'</code></pre>
|
||
<p class="muted">No DB on the host, so agent upgrades are trivially atomic.</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 27 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 8 Ongoing ops</span>
|
||
<h1>SQLite backups</h1>
|
||
<p>Install as a cron inside the LXC — keeps 30 daily snapshots:</p>
|
||
<pre><code>cat > /etc/cron.d/proxmox-monitor-backup <<'EOF'
|
||
30 3 * * * root install -d -m 0700 /var/backups/proxmox-monitor && \
|
||
sqlite3 /var/lib/proxmox-monitor/monitor.db \
|
||
".backup /var/backups/proxmox-monitor/monitor-$(date +\%Y-\%m-\%d).db" && \
|
||
find /var/backups/proxmox-monitor -name 'monitor-*.db' -mtime +30 -delete
|
||
EOF</code></pre>
|
||
<p>SQLite's online-backup command is safe while the server is running.</p>
|
||
<p class="muted">Verify at least one run before declaring the rollout complete.</p>
|
||
</section>
|
||
|
||
<!-- 28 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 9 Sign-off</span>
|
||
<h1>Production readiness</h1>
|
||
<div class="check"><div class="box"></div><code>/health</code> returns 200 with <code>status:ok</code></div>
|
||
<div class="check"><div class="box"></div>External uptime monitor configured and green</div>
|
||
<div class="check"><div class="box"></div>All intended Proxmox hosts on overview, all <strong>online</strong></div>
|
||
<div class="check"><div class="box"></div>≥1 full 48h retention cycle observed (pruning log present)</div>
|
||
<div class="check"><div class="box"></div>SQLite backup cron installed and yesterday's file exists</div>
|
||
<div class="check"><div class="box"></div>You have rolled back once <strong>on purpose</strong> (drill)</div>
|
||
</section>
|
||
|
||
<!-- 29 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">§ 9 Sign-off</span>
|
||
<h1>Secrets hygiene</h1>
|
||
<div class="check"><div class="box"></div>Dashboard password in a password manager, not a text file</div>
|
||
<div class="check"><div class="box"></div><code>SECRET_KEY_BASE</code> in a password manager</div>
|
||
<div class="check"><div class="box"></div><code>/etc/default/proxmox-monitor</code> is <code>0600 root:root</code></div>
|
||
<div class="check"><div class="box"></div><code>/etc/proxmox-monitor/agent.toml</code> is <code>0600 root:root</code> on every host</div>
|
||
<div class="check"><div class="box"></div>You can rotate an agent token in <2 minutes</div>
|
||
<div class="check"><div class="box"></div>A teammate has been walked through one agent install and one token rotation live</div>
|
||
</section>
|
||
|
||
<!-- 30 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">Appendix A</span>
|
||
<h1>Common errors</h1>
|
||
<table>
|
||
<thead><tr><th>Symptom</th><th>First thing to check</th></tr></thead>
|
||
<tbody>
|
||
<tr><td><code>CERT_AUTHORITY_INVALID</code> in browser</td><td>Caddy hasn't finished LE issuance. Wait 60s. <code>journalctl -u caddy</code>.</td></tr>
|
||
<tr><td>Login loops on correct password</td><td><code>DASHBOARD_PASSWORD_HASH</code> mismatch. Regenerate and redeploy.</td></tr>
|
||
<tr><td>Card stays offline after agent restart</td><td>Wrong token or <code>unknown_host</code>. Check agent journal.</td></tr>
|
||
<tr><td>All agents reconnect every ~30s</td><td>Caddy <code>read_timeout</code> missing or too short.</td></tr>
|
||
<tr><td><code>/health</code> returns 503</td><td>Process up but DB unreadable. Check permissions + <code>DATABASE_PATH</code>.</td></tr>
|
||
<tr><td>LXC can't bind port 4000</td><td>Another process owns it. <code>ss -ltnp | grep 4000</code>.</td></tr>
|
||
<tr><td>Agent logs <code>{:enoent, "pvesh"}</code></td><td>Not a Proxmox host, or empty <code>$PATH</code> under systemd.</td></tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
|
||
<!-- 31 -->
|
||
<section class="slide">
|
||
<span class="eyebrow">Appendix B</span>
|
||
<h1>File & port cheat sheet</h1>
|
||
<div class="grid">
|
||
<div>
|
||
<h3>Server LXC</h3>
|
||
<pre><code>/opt/proxmox-monitor/server/ release tree
|
||
/etc/default/proxmox-monitor env secrets, 0600
|
||
/etc/systemd/system/proxmox-monitor.service
|
||
/etc/caddy/Caddyfile
|
||
/var/lib/proxmox-monitor/monitor.db
|
||
/var/backups/proxmox-monitor/ daily backups
|
||
|
||
tcp 443 (caddy) → tcp 127.0.0.1:4000 (phoenix)</code></pre>
|
||
</div>
|
||
<div>
|
||
<h3>Proxmox host (per agent)</h3>
|
||
<pre><code>/usr/local/bin/proxmox-monitor-agent
|
||
/etc/proxmox-monitor/agent.toml token, 0600
|
||
/etc/systemd/system/proxmox-monitor-agent.service
|
||
/var/cache/proxmox-monitor-agent/ Burrito unpack
|
||
|
||
no listening ports</code></pre>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<!-- 32 -->
|
||
<section class="slide end-slide">
|
||
<span class="eyebrow">Done</span>
|
||
<h1>MVP in production</h1>
|
||
<p class="muted" style="max-width:40rem">
|
||
All four phases from the concept shipped: monitoring skeleton, ZFS/VM/storage collectors,
|
||
LiveView dashboard, packaged binaries. The operator has the runbook; agents report; retention
|
||
prunes; backups run. Everything else is iteration.
|
||
</p>
|
||
<p class="muted" style="margin-top:2rem;font-size:.85rem">
|
||
Full runbook: <code>SETUP-AND-DEPLOY.md</code> · Concept: <code>proxmox-monitor-konzept.md</code>
|
||
</p>
|
||
</section>
|
||
|
||
</div>
|
||
|
||
<script>
|
||
(function () {
|
||
const slides = Array.from(document.querySelectorAll('.slide'));
|
||
const total = slides.length;
|
||
document.getElementById('total').textContent = total;
|
||
|
||
let current = 0;
|
||
const params = new URLSearchParams(location.hash.replace(/^#/, '?'));
|
||
if (params.has('s')) {
|
||
const s = parseInt(params.get('s'), 10);
|
||
if (!isNaN(s) && s >= 1 && s <= total) current = s - 1;
|
||
}
|
||
|
||
function render() {
|
||
slides.forEach((el, i) => el.classList.toggle('active', i === current));
|
||
document.getElementById('current').textContent = current + 1;
|
||
document.getElementById('progress').style.width = ((current + 1) / total * 100) + '%';
|
||
location.hash = 's=' + (current + 1);
|
||
slides[current].scrollTop = 0;
|
||
}
|
||
|
||
function go(delta) {
|
||
current = Math.max(0, Math.min(total - 1, current + delta));
|
||
render();
|
||
}
|
||
|
||
document.addEventListener('keydown', (e) => {
|
||
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
||
switch (e.key) {
|
||
case 'ArrowRight':
|
||
case 'PageDown':
|
||
case ' ':
|
||
e.preventDefault(); go(1); break;
|
||
case 'ArrowLeft':
|
||
case 'PageUp':
|
||
e.preventDefault(); go(-1); break;
|
||
case 'Home':
|
||
e.preventDefault(); current = 0; render(); break;
|
||
case 'End':
|
||
e.preventDefault(); current = total - 1; render(); break;
|
||
case 'f':
|
||
case 'F':
|
||
if (!document.fullscreenElement) {
|
||
document.documentElement.requestFullscreen?.();
|
||
} else {
|
||
document.exitFullscreen?.();
|
||
}
|
||
break;
|
||
default:
|
||
if (/^[0-9]$/.test(e.key)) {
|
||
const n = parseInt(e.key, 10);
|
||
if (n >= 1 && n <= Math.min(9, total)) {
|
||
current = n - 1;
|
||
render();
|
||
}
|
||
}
|
||
}
|
||
});
|
||
|
||
// Click anywhere (except links / code) to advance
|
||
document.addEventListener('click', (e) => {
|
||
const t = e.target;
|
||
if (t.closest('a, code, pre, input, button, .topbar')) return;
|
||
go(1);
|
||
});
|
||
|
||
// Swipe
|
||
let touchStartX = null;
|
||
document.addEventListener('touchstart', (e) => {
|
||
if (e.touches.length === 1) touchStartX = e.touches[0].clientX;
|
||
});
|
||
document.addEventListener('touchend', (e) => {
|
||
if (touchStartX === null) return;
|
||
const dx = (e.changedTouches[0].clientX - touchStartX);
|
||
if (Math.abs(dx) > 40) go(dx < 0 ? 1 : -1);
|
||
touchStartX = null;
|
||
});
|
||
|
||
window.addEventListener('hashchange', () => {
|
||
const p = new URLSearchParams(location.hash.replace(/^#/, '?'));
|
||
const s = parseInt(p.get('s'), 10);
|
||
if (!isNaN(s) && s >= 1 && s <= total && s - 1 !== current) {
|
||
current = s - 1;
|
||
render();
|
||
}
|
||
});
|
||
|
||
render();
|
||
})();
|
||
</script>
|
||
|
||
</body>
|
||
</html>
|