spamBERT/rspamd/lua/spamllm.lua
Carsten Abele f05320a8cb Add project README and reason/quote fields to classifier response
- README.md: full project overview with setup, training, API, and RSpamd integration docs
- server.py: add reason (human-readable explanation) and quote (suspicious snippet) to response
- spamllm.lua: pass reason and quote through to RSpamd symbol description for logs/UI

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 22:32:54 +01:00

134 lines
3.8 KiB
Lua

-- RSpamd Lua Plugin für SpamLLM
-- Kopiere nach /etc/rspamd/plugins.d/spamllm.lua
--
-- Dieser Plugin sendet Mail-Daten an den SpamLLM HTTP Service
-- und setzt den Score basierend auf der Antwort.
local rspamd_http = require "rspamd_http"
local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"
local N = "spamllm"
local settings = {
url = "http://127.0.0.1:8000/classify",
timeout = 5.0,
symbol_spam = "SPAMLLM_SPAM",
symbol_ham = "SPAMLLM_HAM",
symbol_foreign = "SPAMLLM_FOREIGN_LANG",
threshold = 0.5,
max_body_length = 4096,
enabled = true,
}
local function check_spamllm(task)
-- Extrahiere Mail-Daten
local from = task:get_from("smtp")
local from_addr = ""
if from and from[1] then
from_addr = from[1].addr or ""
end
local subject = task:get_subject() or ""
local text_parts = task:get_text_parts()
local body = ""
if text_parts then
for _, part in ipairs(text_parts) do
local content = part:get_content()
if content then
body = body .. tostring(content)
if #body > settings.max_body_length then
body = body:sub(1, settings.max_body_length)
break
end
end
end
end
-- JSON Request Body
local request_body = string.format(
'{"from_addr":"%s","subject":"%s","body":"%s"}',
from_addr:gsub('"', '\\"'),
subject:gsub('"', '\\"'),
body:gsub('"', '\\"'):gsub('\n', '\\n'):gsub('\r', '\\r')
)
local function callback(err, code, response_body)
if err then
rspamd_logger.errx(task, "SpamLLM request failed: %s", err)
return
end
if code ~= 200 then
rspamd_logger.errx(task, "SpamLLM returned HTTP %s", code)
return
end
local parser = ucl.parser()
local ok, parse_err = parser:parse_string(response_body)
if not ok then
rspamd_logger.errx(task, "SpamLLM JSON parse error: %s", parse_err)
return
end
local result = parser:get_object()
if result.is_spam then
-- Reason und Quote als Options an das Symbol anhängen
local description = result.reason or "SpamLLM"
if result.quote and #result.quote > 0 then
description = description .. " | " .. result.quote
end
task:insert_result(settings.symbol_spam, result.confidence, description)
rspamd_logger.infox(task, "SpamLLM: SPAM (confidence=%.2f, score=%.2f, lang=%s, reason=%s)",
result.confidence, result.score, result.language or "?", result.reason or "?")
else
task:insert_result(settings.symbol_ham, -result.confidence, "SpamLLM")
end
-- Fremdsprachen-Bonus als separates Symbol
if result.foreign_lang_bonus and result.foreign_lang_bonus > 0 then
task:insert_result(settings.symbol_foreign, result.foreign_lang_bonus / 4.0,
string.format("lang=%s", result.language or "unknown"))
rspamd_logger.infox(task, "SpamLLM: Foreign language detected: %s (bonus=%.1f)",
result.language, result.foreign_lang_bonus)
end
end
rspamd_http.request({
task = task,
url = settings.url,
body = request_body,
callback = callback,
headers = {
["Content-Type"] = "application/json",
},
timeout = settings.timeout,
})
end
-- Symbol registrieren
rspamd_config:register_symbol({
name = settings.symbol_spam,
weight = 5.0,
callback = check_spamllm,
type = "normal",
description = "SpamLLM DistilBERT spam classifier",
})
rspamd_config:register_symbol({
name = settings.symbol_ham,
weight = -2.0,
type = "virtual",
parent = rspamd_config:get_symbol_id(settings.symbol_spam),
description = "SpamLLM DistilBERT ham classification",
})
rspamd_config:register_symbol({
name = settings.symbol_foreign,
weight = 4.0,
type = "virtual",
parent = rspamd_config:get_symbol_id(settings.symbol_spam),
description = "Mail in unerwarteter Sprache (nicht DE/EN)",
})