spamBERT/rspamd/lua/spamllm.lua

-- RSpamd Lua Plugin für SpamLLM
-- Kopiere nach /etc/rspamd/plugins.d/spamllm.lua
--
-- Dieser Plugin sendet Mail-Daten an den SpamLLM HTTP Service
-- und setzt den Score basierend auf der Antwort.

local rspamd_http = require "rspamd_http"
local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"

local N = "spamllm"

local settings = {
  url = "http://127.0.0.1:8000/classify",
  timeout = 5.0,
  symbol_spam = "SPAMLLM_SPAM",
  symbol_ham = "SPAMLLM_HAM",
  symbol_foreign = "SPAMLLM_FOREIGN_LANG",
  threshold = 0.5,
  max_body_length = 4096,
  enabled = true,
}

local function check_spamllm(task)
  -- Extrahiere Mail-Daten
  local from = task:get_from("smtp")
  local from_addr = ""
  if from and from[1] then
    from_addr = from[1].addr or ""
  end

  local subject = task:get_subject() or ""

  local text_parts = task:get_text_parts()
  local body = ""
  if text_parts then
    for _, part in ipairs(text_parts) do
      local content = part:get_content()
      if content then
        body = body .. tostring(content)
        if #body > settings.max_body_length then
          body = body:sub(1, settings.max_body_length)
          break
        end
      end
    end
  end

  -- JSON Request Body
  local request_body = string.format(
    '{"from_addr":"%s","subject":"%s","body":"%s"}',
    from_addr:gsub('"', '\\"'),
    subject:gsub('"', '\\"'),
    body:gsub('"', '\\"'):gsub('\n', '\\n'):gsub('\r', '\\r')
  )

  local function callback(err, code, response_body)
    if err then
      rspamd_logger.errx(task, "SpamLLM request failed: %s", err)
      return
    end

    if code ~= 200 then
      rspamd_logger.errx(task, "SpamLLM returned HTTP %s", code)
      return
    end

    local parser = ucl.parser()
    local ok, parse_err = parser:parse_string(response_body)
    if not ok then
      rspamd_logger.errx(task, "SpamLLM JSON parse error: %s", parse_err)
      return
    end

    local result = parser:get_object()

    if result.is_spam then
      -- Reason und Quote als Options an das Symbol anhängen
      local description = result.reason or "SpamLLM"
      if result.quote and #result.quote > 0 then
        description = description .. " | " .. result.quote
      end
      task:insert_result(settings.symbol_spam, result.confidence, description)
      rspamd_logger.infox(task, "SpamLLM: SPAM (confidence=%.2f, score=%.2f, lang=%s, reason=%s)",
        result.confidence, result.score, result.language or "?", result.reason or "?")
    else
      task:insert_result(settings.symbol_ham, -result.confidence, "SpamLLM")
    end

    -- Fremdsprachen-Bonus als separates Symbol
    if result.foreign_lang_bonus and result.foreign_lang_bonus > 0 then
      task:insert_result(settings.symbol_foreign, result.foreign_lang_bonus / 4.0,
        string.format("lang=%s", result.language or "unknown"))
      rspamd_logger.infox(task, "SpamLLM: Foreign language detected: %s (bonus=%.1f)",
        result.language, result.foreign_lang_bonus)
    end
  end

  rspamd_http.request({
    task = task,
    url = settings.url,
    body = request_body,
    callback = callback,
    headers = {
      ["Content-Type"] = "application/json",
    },
    timeout = settings.timeout,
  })
end

-- Symbol registrieren
rspamd_config:register_symbol({
  name = settings.symbol_spam,
  weight = 5.0,
  callback = check_spamllm,
  type = "normal",
  description = "SpamLLM DistilBERT spam classifier",
})

rspamd_config:register_symbol({
  name = settings.symbol_ham,
  weight = -2.0,
  type = "virtual",
  parent = rspamd_config:get_symbol_id(settings.symbol_spam),
  description = "SpamLLM DistilBERT ham classification",
})

rspamd_config:register_symbol({
  name = settings.symbol_foreign,
  weight = 4.0,
  type = "virtual",
  parent = rspamd_config:get_symbol_id(settings.symbol_spam),
  description = "Mail in unerwarteter Sprache (nicht DE/EN)",
})