feat(server): pure Status.compute/2 for ok/warning/critical/offline

This commit is contained in:
Carsten 2026-04-21 22:49:15 +02:00
parent 9c457c1f68
commit f3e7fab4d2
2 changed files with 102 additions and 0 deletions

View file

@ -0,0 +1,41 @@
defmodule Server.Status do
@moduledoc """
Derive a status level for a host from its latest fast sample.
:offline host has no active agent connection
:critical pool DEGRADED/FAULTED or capacity > 90
:warning capacity 80..90 or pending OS updates
:ok everything nominal
"""
@bad_pool_states ~w(DEGRADED FAULTED SUSPENDED UNAVAIL)
@spec compute(String.t(), map() | nil) :: :offline | :critical | :warning | :ok
def compute(host_status, _payload) when host_status in ~w(offline never_connected),
do: :offline
def compute(_host_status, nil), do: :ok
def compute(_host_status, %{} = payload) do
pools = get_in(payload, ["zfs_pools", "pools"]) || []
pending = get_in(payload, ["system_info", "pending_updates"]) || 0
cond do
Enum.any?(pools, &critical_pool?/1) -> :critical
Enum.any?(pools, &warning_pool?/1) -> :warning
pending > 0 -> :warning
true -> :ok
end
end
defp critical_pool?(pool) do
health = pool["health"]
cap = pool["capacity_percent"] || 0
health in @bad_pool_states or cap > 90
end
defp warning_pool?(pool) do
cap = pool["capacity_percent"] || 0
cap >= 80 and cap <= 90
end
end

View file

@ -0,0 +1,61 @@
defmodule Server.StatusTest do
use ExUnit.Case, async: true
alias Server.Status
describe "compute/2" do
test "returns :offline when host status is offline, regardless of payload" do
assert Status.compute("offline", %{"zfs_pools" => %{"pools" => [healthy_pool()]}}) ==
:offline
end
test "returns :ok with all-healthy payload" do
payload = %{
"zfs_pools" => %{"pools" => [healthy_pool()]},
"system_info" => %{"pending_updates" => 0}
}
assert Status.compute("online", payload) == :ok
end
test "returns :critical for degraded pool" do
payload = %{"zfs_pools" => %{"pools" => [Map.put(healthy_pool(), "health", "DEGRADED")]}}
assert Status.compute("online", payload) == :critical
end
test "returns :critical for pool capacity > 90" do
payload = %{"zfs_pools" => %{"pools" => [Map.put(healthy_pool(), "capacity_percent", 95)]}}
assert Status.compute("online", payload) == :critical
end
test "returns :warning for pool capacity 80..90" do
payload = %{"zfs_pools" => %{"pools" => [Map.put(healthy_pool(), "capacity_percent", 85)]}}
assert Status.compute("online", payload) == :warning
end
test "returns :warning for pending OS updates > 0" do
payload = %{
"zfs_pools" => %{"pools" => [healthy_pool()]},
"system_info" => %{"pending_updates" => 3}
}
assert Status.compute("online", payload) == :warning
end
test "returns :ok when payload is nil (never-seen host) but host is online" do
assert Status.compute("online", nil) == :ok
end
test "treats never_connected like offline" do
assert Status.compute("never_connected", nil) == :offline
end
end
defp healthy_pool do
%{
"name" => "rpool",
"health" => "ONLINE",
"capacity_percent" => 40
}
end
end