From 8c3e953e4e5cda6fbf908f3c4810e93d09dd2fc5 Mon Sep 17 00:00:00 2001 From: Carsten Date: Tue, 21 Apr 2026 22:32:36 +0200 Subject: [PATCH] feat(agent): zfs collector for pools + datasets/snapshots with fixture tests --- agent/lib/proxmox_agent/collectors/zfs.ex | 138 ++++++++++++++++++ agent/test/fixtures/zfs/zfs_list.json | 37 +++++ agent/test/fixtures/zfs/zpool_list.json | 23 +++ agent/test/fixtures/zfs/zpool_status.json | 45 ++++++ .../proxmox_agent/collectors/zfs_test.exs | 62 ++++++++ 5 files changed, 305 insertions(+) create mode 100644 agent/lib/proxmox_agent/collectors/zfs.ex create mode 100644 agent/test/fixtures/zfs/zfs_list.json create mode 100644 agent/test/fixtures/zfs/zpool_list.json create mode 100644 agent/test/fixtures/zfs/zpool_status.json create mode 100644 agent/test/proxmox_agent/collectors/zfs_test.exs diff --git a/agent/lib/proxmox_agent/collectors/zfs.ex b/agent/lib/proxmox_agent/collectors/zfs.ex new file mode 100644 index 0000000..933ba70 --- /dev/null +++ b/agent/lib/proxmox_agent/collectors/zfs.ex @@ -0,0 +1,138 @@ +defmodule ProxmoxAgent.Collectors.Zfs do + @moduledoc """ + Collects ZFS pool health (fast path) and dataset/snapshot info (medium path). + Delegates shelling out to an injectable runner so tests can supply fixtures. + """ + + @type pool_summary :: %{ + name: String.t(), + health: String.t(), + size_bytes: non_neg_integer(), + allocated_bytes: non_neg_integer(), + free_bytes: non_neg_integer(), + fragmentation_percent: non_neg_integer(), + capacity_percent: non_neg_integer(), + error_count: non_neg_integer(), + vdev_count: non_neg_integer(), + degraded_vdev_count: non_neg_integer(), + last_scrub_end: String.t() | nil + } + + @spec collect_pools(keyword()) :: %{pools: [pool_summary()], errors: [map()]} + def collect_pools(opts \\ []) do + runner = runner(opts) + + {list_result, list_err} = decode(runner.("zpool", ["list", "-j", "--json-int"]), :zpool_list) + + {status_result, status_err} = + decode(runner.("zpool", ["status", "-j", "--json-flat-vdevs", "--json-int"]), :zpool_status) + + pools = merge_pools(list_result, status_result) + errors = Enum.filter([list_err, status_err], & &1) + %{pools: pools, errors: errors} + end + + @type dataset_summary :: %{ + name: String.t(), + used_bytes: non_neg_integer(), + usedbysnapshots_bytes: non_neg_integer(), + snapshot_count: non_neg_integer(), + newest_snapshot_unix: non_neg_integer() | nil, + oldest_snapshot_unix: non_neg_integer() | nil + } + + @spec collect_datasets(keyword()) :: %{datasets: [dataset_summary()], errors: [map()]} + def collect_datasets(opts \\ []) do + runner = runner(opts) + + {list_result, err} = + decode(runner.("zfs", ["list", "-j", "--json-int", "-t", "all"]), :zfs_list) + + datasets = summarize_datasets(list_result) + errors = if err, do: [err], else: [] + %{datasets: datasets, errors: errors} + end + + defp runner(opts), do: Keyword.get(opts, :runner, &ProxmoxAgent.Shell.run/2) + + defp decode({:ok, body}, _tag) do + case Jason.decode(body) do + {:ok, map} -> {map, nil} + {:error, e} -> {nil, %{tag: "decode", message: Exception.message(e)}} + end + end + + defp decode({:error, reason}, tag), + do: {nil, %{tag: Atom.to_string(tag), message: inspect(reason)}} + + defp merge_pools(nil, _), do: [] + defp merge_pools(_, nil), do: [] + + defp merge_pools(%{"pools" => list_pools}, %{"pools" => status_pools}) do + Enum.map(list_pools, fn {name, list_info} -> + status_info = Map.get(status_pools, name, %{}) + vdevs = Map.get(status_info, "vdevs", %{}) |> Map.values() + + %{ + name: name, + health: Map.get(list_info, "health"), + size_bytes: Map.get(list_info, "size", 0), + allocated_bytes: Map.get(list_info, "alloc", 0), + free_bytes: Map.get(list_info, "free", 0), + fragmentation_percent: Map.get(list_info, "frag", 0), + capacity_percent: Map.get(list_info, "cap", 0), + error_count: to_int(Map.get(status_info, "error_count", "0")), + vdev_count: length(vdevs), + degraded_vdev_count: Enum.count(vdevs, &(&1["state"] != "ONLINE")), + last_scrub_end: get_in(status_info, ["scan", "end_time"]) + } + end) + end + + defp summarize_datasets(nil), do: [] + + defp summarize_datasets(%{"datasets" => datasets}) do + by_type = Enum.group_by(datasets, fn {_, d} -> d["type"] end) + + filesystems = Map.get(by_type, "FILESYSTEM", []) + snapshots_by_ds = group_snapshots(Map.get(by_type, "SNAPSHOT", [])) + + Enum.map(filesystems, fn {_name, ds} -> + name = ds["name"] + snaps = Map.get(snapshots_by_ds, name, []) + + %{ + name: name, + used_bytes: get_prop_int(ds, "used"), + usedbysnapshots_bytes: get_prop_int(ds, "usedbysnapshots"), + snapshot_count: length(snaps), + newest_snapshot_unix: snaps |> Enum.map(& &1.creation) |> max_or_nil(), + oldest_snapshot_unix: snaps |> Enum.map(& &1.creation) |> min_or_nil() + } + end) + end + + defp group_snapshots(snapshots) do + Enum.reduce(snapshots, %{}, fn {_, snap}, acc -> + [parent | _] = String.split(snap["name"], "@", parts: 2) + creation = get_prop_int(snap, "creation") + entry = %{name: snap["name"], creation: creation} + Map.update(acc, parent, [entry], &[entry | &1]) + end) + end + + defp get_prop_int(ds, key) do + case get_in(ds, ["properties", key, "value"]) do + nil -> 0 + v -> to_int(v) + end + end + + defp to_int(v) when is_integer(v), do: v + defp to_int(v) when is_binary(v), do: String.to_integer(v) + + defp max_or_nil([]), do: nil + defp max_or_nil(list), do: Enum.max(list) + defp min_or_nil([]), do: nil + defp min_or_nil(list), do: Enum.min(list) +end diff --git a/agent/test/fixtures/zfs/zfs_list.json b/agent/test/fixtures/zfs/zfs_list.json new file mode 100644 index 0000000..f8e64e3 --- /dev/null +++ b/agent/test/fixtures/zfs/zfs_list.json @@ -0,0 +1,37 @@ +{ + "output_version": { "command": "zfs list", "vers_major": 0, "vers_minor": 1 }, + "datasets": { + "rpool": { + "name": "rpool", + "type": "FILESYSTEM", + "properties": { + "used": { "value": "200000000000" }, + "available": { "value": "300000000000" }, + "usedbysnapshots": { "value": "5000000000" } + } + }, + "rpool/data": { + "name": "rpool/data", + "type": "FILESYSTEM", + "properties": { + "used": { "value": "100000000000" }, + "available": { "value": "300000000000" }, + "usedbysnapshots": { "value": "2000000000" } + } + }, + "rpool/data@daily-2026-04-20": { + "name": "rpool/data@daily-2026-04-20", + "type": "SNAPSHOT", + "properties": { + "creation": { "value": "1745107200" } + } + }, + "rpool/data@daily-2026-04-21": { + "name": "rpool/data@daily-2026-04-21", + "type": "SNAPSHOT", + "properties": { + "creation": { "value": "1745193600" } + } + } + } +} diff --git a/agent/test/fixtures/zfs/zpool_list.json b/agent/test/fixtures/zfs/zpool_list.json new file mode 100644 index 0000000..f1f47bf --- /dev/null +++ b/agent/test/fixtures/zfs/zpool_list.json @@ -0,0 +1,23 @@ +{ + "output_version": { "command": "zpool list", "vers_major": 0, "vers_minor": 1 }, + "pools": { + "rpool": { + "name": "rpool", + "size": 500000000000, + "alloc": 200000000000, + "free": 300000000000, + "frag": 17, + "cap": 40, + "health": "ONLINE" + }, + "tank": { + "name": "tank", + "size": 8000000000000, + "alloc": 6000000000000, + "free": 2000000000000, + "frag": 55, + "cap": 75, + "health": "DEGRADED" + } + } +} diff --git a/agent/test/fixtures/zfs/zpool_status.json b/agent/test/fixtures/zfs/zpool_status.json new file mode 100644 index 0000000..b066e78 --- /dev/null +++ b/agent/test/fixtures/zfs/zpool_status.json @@ -0,0 +1,45 @@ +{ + "output_version": { "command": "zpool status", "vers_major": 0, "vers_minor": 1 }, + "pools": { + "rpool": { + "name": "rpool", + "state": "ONLINE", + "scan": { + "function": "scrub", + "state": "FINISHED", + "end_time": "Sat Apr 19 02:00:00 2026" + }, + "error_count": "0", + "vdevs": { + "mirror-0": { + "name": "mirror-0", + "vdev_type": "mirror", + "state": "ONLINE", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "0" + } + } + }, + "tank": { + "name": "tank", + "state": "DEGRADED", + "scan": { + "function": "scrub", + "state": "FINISHED", + "end_time": "Tue Mar 01 08:00:00 2026" + }, + "error_count": "2", + "vdevs": { + "raidz2-0": { + "name": "raidz2-0", + "vdev_type": "raidz2", + "state": "DEGRADED", + "read_errors": "0", + "write_errors": "0", + "checksum_errors": "2" + } + } + } + } +} diff --git a/agent/test/proxmox_agent/collectors/zfs_test.exs b/agent/test/proxmox_agent/collectors/zfs_test.exs new file mode 100644 index 0000000..e3220d1 --- /dev/null +++ b/agent/test/proxmox_agent/collectors/zfs_test.exs @@ -0,0 +1,62 @@ +defmodule ProxmoxAgent.Collectors.ZfsTest do + use ExUnit.Case, async: true + + alias ProxmoxAgent.Collectors.Zfs + + @fixtures Path.expand("../../fixtures/zfs", __DIR__) + + defp fake_runner do + fn + "zpool", ["list", "-j", "--json-int"] -> + {:ok, File.read!(Path.join(@fixtures, "zpool_list.json"))} + + "zpool", ["status", "-j", "--json-flat-vdevs", "--json-int"] -> + {:ok, File.read!(Path.join(@fixtures, "zpool_status.json"))} + + "zfs", ["list", "-j", "--json-int", "-t", "all"] -> + {:ok, File.read!(Path.join(@fixtures, "zfs_list.json"))} + end + end + + describe "collect_pools/1" do + test "returns a summary per pool" do + sample = Zfs.collect_pools(runner: fake_runner()) + assert is_list(sample.pools) + assert length(sample.pools) == 2 + rpool = Enum.find(sample.pools, &(&1.name == "rpool")) + tank = Enum.find(sample.pools, &(&1.name == "tank")) + + assert rpool.health == "ONLINE" + assert rpool.capacity_percent == 40 + assert rpool.fragmentation_percent == 17 + assert rpool.size_bytes == 500_000_000_000 + assert rpool.error_count == 0 + assert rpool.degraded_vdev_count == 0 + + assert tank.health == "DEGRADED" + assert tank.error_count == 2 + assert tank.degraded_vdev_count == 1 + end + + test "populates errors list when zpool fails" do + failing = fn _, _ -> {:error, {:enoent, "zpool"}} end + sample = Zfs.collect_pools(runner: failing) + assert sample.pools == [] + assert length(sample.errors) >= 1 + end + end + + describe "collect_datasets/1" do + test "returns datasets and per-dataset snapshot summary" do + sample = Zfs.collect_datasets(runner: fake_runner()) + assert length(sample.datasets) == 2 + + rpool_data = Enum.find(sample.datasets, &(&1.name == "rpool/data")) + assert rpool_data.used_bytes == 100_000_000_000 + assert rpool_data.usedbysnapshots_bytes == 2_000_000_000 + assert rpool_data.snapshot_count == 2 + assert rpool_data.newest_snapshot_unix == 1_745_193_600 + assert rpool_data.oldest_snapshot_unix == 1_745_107_200 + end + end +end