feat(server): retention GenServer prunes samples older than 48h hourly

This commit is contained in:
Carsten 2026-04-21 22:29:24 +02:00
parent 751e035579
commit f09a77996b
3 changed files with 58 additions and 0 deletions

View file

@ -15,6 +15,7 @@ defmodule Server.Application do
skip: skip_migrations?()}, skip: skip_migrations?()},
{DNSCluster, query: Application.get_env(:server, :dns_cluster_query) || :ignore}, {DNSCluster, query: Application.get_env(:server, :dns_cluster_query) || :ignore},
{Phoenix.PubSub, name: Server.PubSub}, {Phoenix.PubSub, name: Server.PubSub},
Server.Retention,
# Start a worker by calling: Server.Worker.start_link(arg) # Start a worker by calling: Server.Worker.start_link(arg)
# {Server.Worker, arg}, # {Server.Worker, arg},
# Start to serve requests, typically the last entry # Start to serve requests, typically the last entry

View file

@ -0,0 +1,36 @@
defmodule Server.Retention do
@moduledoc "Deletes metric samples older than the retention window. Runs hourly."
use GenServer
require Logger
@default_retention_seconds 48 * 60 * 60
@default_interval_ms 60 * 60 * 1_000
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@doc "Synchronous prune used by tests and manual ops."
def prune_now(retention_seconds \\ @default_retention_seconds) do
cutoff = DateTime.add(DateTime.utc_now(), -retention_seconds, :second)
Server.Metrics.delete_older_than(cutoff)
end
@impl true
def init(opts) do
retention_seconds = Keyword.get(opts, :retention_seconds, @default_retention_seconds)
interval_ms = Keyword.get(opts, :interval_ms, @default_interval_ms)
state = %{retention_seconds: retention_seconds, interval_ms: interval_ms}
Process.send_after(self(), :prune, interval_ms)
{:ok, state}
end
@impl true
def handle_info(:prune, state) do
{count, _} = prune_now(state.retention_seconds)
if count > 0, do: Logger.info("retention: pruned #{count} stale samples")
Process.send_after(self(), :prune, state.interval_ms)
{:noreply, state}
end
end

View file

@ -0,0 +1,21 @@
defmodule Server.RetentionTest do
use Server.DataCase, async: false
alias Server.{Hosts, Metrics, Retention}
test "prune_now/1 deletes samples older than the retention window" do
{:ok, {host, _}} = Hosts.create_host("pve-01")
stale_at = DateTime.add(DateTime.utc_now(), -49 * 3600, :second)
fresh_at = DateTime.add(DateTime.utc_now(), -60, :second)
{:ok, _} = Metrics.record_sample(host.id, "fast", stale_at, %{"x" => 1})
{:ok, fresh} = Metrics.record_sample(host.id, "fast", fresh_at, %{"x" => 2})
{deleted, _} = Retention.prune_now(48 * 3600)
assert deleted == 1
remaining = Server.Repo.all(Server.Schema.Metric)
assert length(remaining) == 1
assert hd(remaining).id == fresh.id
end
end