- README.md: full project overview with setup, training, API, and RSpamd integration docs - server.py: add reason (human-readable explanation) and quote (suspicious snippet) to response - spamllm.lua: pass reason and quote through to RSpamd symbol description for logs/UI Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
134 lines
3.8 KiB
Lua
134 lines
3.8 KiB
Lua
-- RSpamd Lua Plugin für SpamLLM
|
|
-- Kopiere nach /etc/rspamd/plugins.d/spamllm.lua
|
|
--
|
|
-- Dieser Plugin sendet Mail-Daten an den SpamLLM HTTP Service
|
|
-- und setzt den Score basierend auf der Antwort.
|
|
|
|
local rspamd_http = require "rspamd_http"
|
|
local rspamd_logger = require "rspamd_logger"
|
|
local ucl = require "ucl"
|
|
|
|
local N = "spamllm"
|
|
|
|
local settings = {
|
|
url = "http://127.0.0.1:8000/classify",
|
|
timeout = 5.0,
|
|
symbol_spam = "SPAMLLM_SPAM",
|
|
symbol_ham = "SPAMLLM_HAM",
|
|
symbol_foreign = "SPAMLLM_FOREIGN_LANG",
|
|
threshold = 0.5,
|
|
max_body_length = 4096,
|
|
enabled = true,
|
|
}
|
|
|
|
local function check_spamllm(task)
|
|
-- Extrahiere Mail-Daten
|
|
local from = task:get_from("smtp")
|
|
local from_addr = ""
|
|
if from and from[1] then
|
|
from_addr = from[1].addr or ""
|
|
end
|
|
|
|
local subject = task:get_subject() or ""
|
|
|
|
local text_parts = task:get_text_parts()
|
|
local body = ""
|
|
if text_parts then
|
|
for _, part in ipairs(text_parts) do
|
|
local content = part:get_content()
|
|
if content then
|
|
body = body .. tostring(content)
|
|
if #body > settings.max_body_length then
|
|
body = body:sub(1, settings.max_body_length)
|
|
break
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- JSON Request Body
|
|
local request_body = string.format(
|
|
'{"from_addr":"%s","subject":"%s","body":"%s"}',
|
|
from_addr:gsub('"', '\\"'),
|
|
subject:gsub('"', '\\"'),
|
|
body:gsub('"', '\\"'):gsub('\n', '\\n'):gsub('\r', '\\r')
|
|
)
|
|
|
|
local function callback(err, code, response_body)
|
|
if err then
|
|
rspamd_logger.errx(task, "SpamLLM request failed: %s", err)
|
|
return
|
|
end
|
|
|
|
if code ~= 200 then
|
|
rspamd_logger.errx(task, "SpamLLM returned HTTP %s", code)
|
|
return
|
|
end
|
|
|
|
local parser = ucl.parser()
|
|
local ok, parse_err = parser:parse_string(response_body)
|
|
if not ok then
|
|
rspamd_logger.errx(task, "SpamLLM JSON parse error: %s", parse_err)
|
|
return
|
|
end
|
|
|
|
local result = parser:get_object()
|
|
|
|
if result.is_spam then
|
|
-- Reason und Quote als Options an das Symbol anhängen
|
|
local description = result.reason or "SpamLLM"
|
|
if result.quote and #result.quote > 0 then
|
|
description = description .. " | " .. result.quote
|
|
end
|
|
task:insert_result(settings.symbol_spam, result.confidence, description)
|
|
rspamd_logger.infox(task, "SpamLLM: SPAM (confidence=%.2f, score=%.2f, lang=%s, reason=%s)",
|
|
result.confidence, result.score, result.language or "?", result.reason or "?")
|
|
else
|
|
task:insert_result(settings.symbol_ham, -result.confidence, "SpamLLM")
|
|
end
|
|
|
|
-- Fremdsprachen-Bonus als separates Symbol
|
|
if result.foreign_lang_bonus and result.foreign_lang_bonus > 0 then
|
|
task:insert_result(settings.symbol_foreign, result.foreign_lang_bonus / 4.0,
|
|
string.format("lang=%s", result.language or "unknown"))
|
|
rspamd_logger.infox(task, "SpamLLM: Foreign language detected: %s (bonus=%.1f)",
|
|
result.language, result.foreign_lang_bonus)
|
|
end
|
|
end
|
|
|
|
rspamd_http.request({
|
|
task = task,
|
|
url = settings.url,
|
|
body = request_body,
|
|
callback = callback,
|
|
headers = {
|
|
["Content-Type"] = "application/json",
|
|
},
|
|
timeout = settings.timeout,
|
|
})
|
|
end
|
|
|
|
-- Symbol registrieren
|
|
rspamd_config:register_symbol({
|
|
name = settings.symbol_spam,
|
|
weight = 5.0,
|
|
callback = check_spamllm,
|
|
type = "normal",
|
|
description = "SpamLLM DistilBERT spam classifier",
|
|
})
|
|
|
|
rspamd_config:register_symbol({
|
|
name = settings.symbol_ham,
|
|
weight = -2.0,
|
|
type = "virtual",
|
|
parent = rspamd_config:get_symbol_id(settings.symbol_spam),
|
|
description = "SpamLLM DistilBERT ham classification",
|
|
})
|
|
|
|
rspamd_config:register_symbol({
|
|
name = settings.symbol_foreign,
|
|
weight = 4.0,
|
|
type = "virtual",
|
|
parent = rspamd_config:get_symbol_id(settings.symbol_spam),
|
|
description = "Mail in unerwarteter Sprache (nicht DE/EN)",
|
|
})
|