proxMon/agent/lib/proxmox_agent/collectors/host.ex

106 lines
2.8 KiB
Elixir

defmodule ProxmoxAgent.Collectors.Host do
@moduledoc """
Reads host metrics from /proc. Accepts `proc_dir:` option for testability.
Never raises — on read failure, populates `:errors` and leaves the field nil.
"""
@type sample :: %{
hostname: String.t(),
load1: float() | nil,
load5: float() | nil,
load15: float() | nil,
mem_total_bytes: non_neg_integer() | nil,
mem_available_bytes: non_neg_integer() | nil,
mem_used_bytes: non_neg_integer() | nil,
uptime_seconds: non_neg_integer() | nil,
errors: [term()]
}
@spec collect(keyword()) :: sample()
def collect(opts \\ []) do
proc_dir = Keyword.get(opts, :proc_dir, "/proc")
{load, e1} = safe(fn -> read_loadavg(proc_dir) end, {nil, nil, nil}, :loadavg)
{mem, e2} = safe(fn -> read_meminfo(proc_dir) end, %{total: nil, available: nil}, :meminfo)
{uptime, e3} = safe(fn -> read_uptime(proc_dir) end, nil, :uptime)
total = mem.total
avail = mem.available
used = if total && avail, do: total - avail, else: nil
{load1, load5, load15} = load
%{
hostname: hostname(),
load1: load1,
load5: load5,
load15: load15,
mem_total_bytes: total,
mem_available_bytes: avail,
mem_used_bytes: used,
uptime_seconds: uptime,
errors: Enum.filter([e1, e2, e3], & &1)
}
end
defp safe(fun, fallback, tag) do
try do
{fun.(), nil}
rescue
e -> {fallback, {tag, Exception.message(e)}}
catch
:error, reason -> {fallback, {tag, reason}}
end
end
defp read_loadavg(proc_dir) do
body = File.read!(Path.join(proc_dir, "loadavg"))
[l1, l5, l15 | _] = String.split(body, ~r/\s+/, trim: true)
{to_float(l1), to_float(l5), to_float(l15)}
end
defp read_meminfo(proc_dir) do
body = File.read!(Path.join(proc_dir, "meminfo"))
parsed =
body
|> String.split("\n", trim: true)
|> Enum.reduce(%{}, fn line, acc ->
case String.split(line, ~r/:\s+/, parts: 2) do
[key, val] -> Map.put(acc, key, val)
_ -> acc
end
end)
%{
total: kb_to_bytes(parsed["MemTotal"]),
available: kb_to_bytes(parsed["MemAvailable"])
}
end
defp read_uptime(proc_dir) do
body = File.read!(Path.join(proc_dir, "uptime"))
[secs | _] = String.split(body, " ", trim: true)
secs |> to_float() |> trunc()
end
defp kb_to_bytes(nil), do: nil
defp kb_to_bytes(str) do
case Regex.run(~r/(\d+)\s*kB/, str) do
[_, kb] -> String.to_integer(kb) * 1024
_ -> nil
end
end
defp to_float(s) do
{f, _} = Float.parse(s)
f
end
defp hostname do
case :inet.gethostname() do
{:ok, name} -> List.to_string(name)
_ -> "unknown-host"
end
end
end