From f05320a8cb7955de337b3c9933928ac9b0a1ead2 Mon Sep 17 00:00:00 2001 From: Carsten Abele Date: Thu, 19 Mar 2026 22:32:54 +0100 Subject: [PATCH] Add project README and reason/quote fields to classifier response - README.md: full project overview with setup, training, API, and RSpamd integration docs - server.py: add reason (human-readable explanation) and quote (suspicious snippet) to response - spamllm.lua: pass reason and quote through to RSpamd symbol description for logs/UI Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 129 +++++++++++++++++++++++++++++++++++++++++ rspamd/lua/spamllm.lua | 11 +++- server.py | 66 +++++++++++++++++++++ 3 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ed9903 --- /dev/null +++ b/README.md @@ -0,0 +1,129 @@ +# SpamLLM + +A multilingual spam classifier for RSpamd using fine-tuned DistilBERT. Classifies emails in German and English, with automatic language detection that flags unexpected languages as suspicious. + +## How it works + +``` +Incoming Mail → RSpamd → HTTP POST → SpamLLM Service → Score + Reason + Quote → RSpamd +``` + +RSpamd sends mail metadata (subject, body, sender) to the SpamLLM FastAPI service. The service runs the text through a fine-tuned `distilbert-base-multilingual-cased` model and returns: + +- **score** (0-15, RSpamd-compatible) +- **reason** (human-readable explanation, e.g. "High spam confidence (94%)") +- **quote** (the most suspicious snippet from the mail) +- **language** detection with a score bonus for non-DE/EN mails + +## Project structure + +``` +spamllm/ +├── train.py # Fine-tune DistilBERT on spam/ham data +├── server.py # FastAPI classification service +├── test_classify.py # Local model validation (DE/EN/foreign samples) +├── export_rspamd_data.py # Export Maildir folders to CSV training data +├── requirements.txt # Python dependencies +└── rspamd/ + ├── local.d/ + │ └── external_services.conf # RSpamd config + └── lua/ + └── spamllm.lua # RSpamd Lua plugin +``` + +## Setup + +```bash +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Training + +### Option A: Demo dataset (quick start) + +Uses the public SMS Spam Collection (~5,500 English messages): + +```bash +python train.py --epochs 3 +``` + +### Option B: Your own mail data (recommended for production) + +1. Export your existing mails from Maildir: + +```bash +python export_rspamd_data.py \ + --spam-dir /var/vmail/user/.Junk/cur \ + --ham-dir /var/vmail/user/.INBOX/cur \ + --max-per-class 5000 +``` + +2. Train on the exported data: + +```bash +python train.py --custom-data --epochs 5 +``` + +You can also place a German-only dataset at `data/train_de.csv` (columns: `text`, `labels`) to supplement the English demo data when training without `--custom-data`. + +## Running the service + +```bash +uvicorn server:app --host 127.0.0.1 --port 8000 +``` + +### API + +**POST /classify** + +```json +{ + "subject": "Sie haben gewonnen!", + "body": "Klicken Sie hier um Ihren Preis abzuholen...", + "from_addr": "spam@example.com" +} +``` + +Response: + +```json +{ + "is_spam": true, + "confidence": 0.94, + "score": 14.1, + "language": "de", + "foreign_lang_bonus": 0.0, + "reason": "High spam confidence (94%)", + "quote": "...Klicken Sie hier um Ihren Preis abzuholen. Senden Sie uns Ihre Bankdaten..." +} +``` + +**GET /health** — returns `{"status": "ok", "model_loaded": true}` + +## RSpamd integration + +1. Copy `rspamd/lua/spamllm.lua` to `/etc/rspamd/plugins.d/` +2. Copy `rspamd/local.d/external_services.conf` to `/etc/rspamd/local.d/` +3. Reload RSpamd: `rspamadm reload` + +The plugin registers three symbols: + +| Symbol | Weight | Description | +|--------|--------|-------------| +| `SPAMLLM_SPAM` | +5.0 | Spam detected by classifier | +| `SPAMLLM_HAM` | -2.0 | Ham detected by classifier | +| `SPAMLLM_FOREIGN_LANG` | +4.0 | Unexpected language (not DE/EN) | + +The Lua plugin only sends mails in the RSpamd grey zone (score 3-12) to the service, so obvious spam/ham is handled by RSpamd's built-in rules without extra latency. + +## Language detection + +Mails are classified by language using `langdetect`. Expected languages (German, English) are scored normally. All other languages receive a +4 point spam bonus and a lowered spam threshold (0.3 instead of 0.5), since non-DE/EN mails are almost always spam in this environment. + +## Performance + +- Inference: ~20-50ms per mail on CPU +- Model size: ~500MB (distilbert-base-multilingual-cased) +- Training on demo dataset: ~10-15 min on CPU, ~2 min on GPU diff --git a/rspamd/lua/spamllm.lua b/rspamd/lua/spamllm.lua index c8313bd..4164b88 100644 --- a/rspamd/lua/spamllm.lua +++ b/rspamd/lua/spamllm.lua @@ -75,9 +75,14 @@ local function check_spamllm(task) local result = parser:get_object() if result.is_spam then - task:insert_result(settings.symbol_spam, result.confidence, "SpamLLM") - rspamd_logger.infox(task, "SpamLLM: SPAM (confidence=%.2f, score=%.2f, lang=%s)", - result.confidence, result.score, result.language or "?") + -- Reason und Quote als Options an das Symbol anhängen + local description = result.reason or "SpamLLM" + if result.quote and #result.quote > 0 then + description = description .. " | " .. result.quote + end + task:insert_result(settings.symbol_spam, result.confidence, description) + rspamd_logger.infox(task, "SpamLLM: SPAM (confidence=%.2f, score=%.2f, lang=%s, reason=%s)", + result.confidence, result.score, result.language or "?", result.reason or "?") else task:insert_result(settings.symbol_ham, -result.confidence, "SpamLLM") end diff --git a/server.py b/server.py index 4525a43..d234d97 100644 --- a/server.py +++ b/server.py @@ -62,6 +62,8 @@ class ClassifyResponse(BaseModel): score: float # RSpamd-kompatibler Score (0-15) language: str # Erkannte Sprache foreign_lang_bonus: float # Zusätzlicher Score für Fremdsprache + reason: str # Menschenlesbare Begründung + quote: str # Verdächtigster Textausschnitt def detect_language(text: str) -> tuple[str, bool]: @@ -79,6 +81,65 @@ def detect_language(text: str) -> tuple[str, bool]: return "unknown", False +# Spam-Signalwörter für Quote-Extraktion (DE + EN) +SPAM_PATTERNS = [ + "click here", "klicken sie", "jetzt bestellen", "order now", + "act now", "sofort", "dringend", "urgent", "verify your", + "bestätigen sie", "gewonnen", "you won", "congratulations", + "herzlichen glückwunsch", "free", "gratis", "kostenlos", + "100%", "guarantee", "garantie", "limited time", "nur heute", + "unsubscribe", "abmelden", "no risk", "kein risiko", + "bank details", "bankdaten", "password", "passwort", + "account suspended", "konto gesperrt", "credit card", "kreditkarte", + "viagra", "cialis", "pharmacy", "apotheke", "discount", "rabatt", + "million", "prize", "preis", "winner", "gewinner", +] + + +def find_spam_quote(subject: str, body: str) -> str: + """Findet den verdächtigsten Textausschnitt in der Mail.""" + full_text = f"{subject} {body}".lower() + + for pattern in SPAM_PATTERNS: + pos = full_text.find(pattern) + if pos != -1: + # Kontext um das Match herum extrahieren (max 120 Zeichen) + original = f"{subject} {body}" + start = max(0, pos - 30) + end = min(len(original), pos + len(pattern) + 60) + snippet = original[start:end].strip() + if start > 0: + snippet = "..." + snippet + if end < len(original): + snippet = snippet + "..." + return snippet + + # Kein Pattern gefunden -> ersten Satz des Bodys als Fallback + if body: + first_sentence = body.split(".")[0].strip() + return first_sentence[:120] + ("..." if len(first_sentence) > 120 else "") + return subject[:120] if subject else "" + + +def build_reason(spam_prob: float, is_foreign: bool, language: str) -> str: + """Baut eine menschenlesbare Begründung zusammen.""" + reasons = [] + + if spam_prob > 0.8: + reasons.append(f"High spam confidence ({spam_prob:.0%})") + elif spam_prob > 0.5: + reasons.append(f"Moderate spam confidence ({spam_prob:.0%})") + elif spam_prob > 0.3: + reasons.append(f"Low spam confidence ({spam_prob:.0%})") + else: + reasons.append(f"Likely ham ({1 - spam_prob:.0%} confidence)") + + if is_foreign: + reasons.append(f"Unexpected language: {language} (not DE/EN)") + + return "; ".join(reasons) + + @app.post("/classify", response_model=ClassifyResponse) async def classify(request: ClassifyRequest): # Kombiniere Mail-Felder zu einem Text @@ -106,12 +167,17 @@ async def classify(request: ClassifyRequest): # Spam-Schwelle nach Bonus neu bewerten effective_spam = spam_prob > 0.5 or (is_foreign and spam_prob > 0.3) + reason = build_reason(spam_prob, is_foreign, language) + quote = find_spam_quote(request.subject, request.body) if effective_spam else "" + return ClassifyResponse( is_spam=effective_spam, confidence=spam_prob, score=round(rspamd_score, 2), language=language, foreign_lang_bonus=lang_bonus, + reason=reason, + quote=quote, )