feat(agent): add Diagnostics module with no-op default

This commit is contained in:
Carsten 2026-04-22 22:16:55 +02:00
parent 1c289a5a0d
commit c72eed1307
2 changed files with 128 additions and 0 deletions

View file

@ -0,0 +1,64 @@
defmodule ProxmoxAgent.Diagnostics do
@moduledoc """
Optional diagnostic dump of external commands and outgoing samples.
Controlled by `:dump_dir` in the application env. When unset or nil,
`log_command/4` and `log_sample/2` are no-ops and incur no I/O.
When set, they cast to `ProxmoxAgent.Diagnostics.Writer` (if running)
which serializes appends to `commands.log` and `samples.log`.
"""
require Logger
@type command_result ::
{:ok, String.t()}
| {:error, term()}
@spec configure(String.t() | nil) :: :ok
def configure(nil), do: disable()
def configure(""), do: disable()
def configure(dir) when is_binary(dir) do
case File.mkdir_p(dir) do
:ok ->
Application.put_env(:agent, :dump_dir, dir)
:ok
{:error, reason} ->
Logger.warning("diagnostics: mkdir_p #{dir} failed (#{inspect(reason)}); dumping disabled")
disable()
end
end
@spec enabled?() :: boolean()
def enabled?, do: not is_nil(Application.get_env(:agent, :dump_dir))
@spec dump_dir() :: String.t() | nil
def dump_dir, do: Application.get_env(:agent, :dump_dir)
@spec log_command(String.t(), [String.t()], command_result(), non_neg_integer()) :: :ok
def log_command(cmd, args, result, duration_us) do
cast({:command, cmd, args, result, duration_us})
end
@spec log_sample(String.t(), map()) :: :ok
def log_sample(kind, payload) when is_binary(kind) and is_map(payload) do
cast({:sample, kind, payload})
end
defp disable do
Application.delete_env(:agent, :dump_dir)
:ok
end
defp cast(msg) do
if enabled?() do
case Process.whereis(ProxmoxAgent.Diagnostics.Writer) do
nil -> :ok
pid -> GenServer.cast(pid, msg)
end
end
:ok
end
end

View file

@ -0,0 +1,64 @@
defmodule ProxmoxAgent.DiagnosticsTest do
use ExUnit.Case, async: false
alias ProxmoxAgent.Diagnostics
setup do
# Isolate tests: clear the env key before and after each test.
Application.delete_env(:agent, :dump_dir)
on_exit(fn -> Application.delete_env(:agent, :dump_dir) end)
:ok
end
describe "configure/1 and enabled?/0" do
test "nil disables and returns :ok" do
assert :ok = Diagnostics.configure(nil)
refute Diagnostics.enabled?()
end
test "empty string disables and returns :ok" do
assert :ok = Diagnostics.configure("")
refute Diagnostics.enabled?()
end
test "valid path creates the directory and enables" do
dir = Path.join(System.tmp_dir!(), "diag-#{System.unique_integer([:positive])}")
on_exit(fn -> File.rm_rf(dir) end)
assert :ok = Diagnostics.configure(dir)
assert Diagnostics.enabled?()
assert File.dir?(dir)
assert Application.get_env(:agent, :dump_dir) == dir
end
test "unreachable path disables (does not crash)" do
# Point at a path under a non-directory file to force mkdir_p failure.
parent = Path.join(System.tmp_dir!(), "diag-parent-#{System.unique_integer([:positive])}")
File.write!(parent, "not a directory")
dir = Path.join(parent, "child")
on_exit(fn -> File.rm_rf(parent) end)
assert :ok = Diagnostics.configure(dir)
refute Diagnostics.enabled?()
end
end
describe "log_command/4 and log_sample/2 (no writer running)" do
test "log_command/4 no-ops and returns :ok when disabled" do
assert :ok = Diagnostics.log_command("zpool", ["list"], {:ok, "body"}, 1_234)
end
test "log_sample/2 no-ops and returns :ok when disabled" do
assert :ok = Diagnostics.log_sample("fast", %{foo: "bar"})
end
test "log_command/4 no-ops when enabled but writer is not running" do
dir = Path.join(System.tmp_dir!(), "diag-#{System.unique_integer([:positive])}")
on_exit(fn -> File.rm_rf(dir) end)
:ok = Diagnostics.configure(dir)
# Writer is not started in this test — log should still be safe.
assert :ok = Diagnostics.log_command("zpool", ["list"], {:ok, "body"}, 1_234)
end
end
end