Add project README and reason/quote fields to classifier response
- README.md: full project overview with setup, training, API, and RSpamd integration docs - server.py: add reason (human-readable explanation) and quote (suspicious snippet) to response - spamllm.lua: pass reason and quote through to RSpamd symbol description for logs/UI Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
38efd20b4d
commit
f05320a8cb
3 changed files with 203 additions and 3 deletions
66
server.py
66
server.py
|
|
@ -62,6 +62,8 @@ class ClassifyResponse(BaseModel):
|
|||
score: float # RSpamd-kompatibler Score (0-15)
|
||||
language: str # Erkannte Sprache
|
||||
foreign_lang_bonus: float # Zusätzlicher Score für Fremdsprache
|
||||
reason: str # Menschenlesbare Begründung
|
||||
quote: str # Verdächtigster Textausschnitt
|
||||
|
||||
|
||||
def detect_language(text: str) -> tuple[str, bool]:
|
||||
|
|
@ -79,6 +81,65 @@ def detect_language(text: str) -> tuple[str, bool]:
|
|||
return "unknown", False
|
||||
|
||||
|
||||
# Spam-Signalwörter für Quote-Extraktion (DE + EN)
|
||||
SPAM_PATTERNS = [
|
||||
"click here", "klicken sie", "jetzt bestellen", "order now",
|
||||
"act now", "sofort", "dringend", "urgent", "verify your",
|
||||
"bestätigen sie", "gewonnen", "you won", "congratulations",
|
||||
"herzlichen glückwunsch", "free", "gratis", "kostenlos",
|
||||
"100%", "guarantee", "garantie", "limited time", "nur heute",
|
||||
"unsubscribe", "abmelden", "no risk", "kein risiko",
|
||||
"bank details", "bankdaten", "password", "passwort",
|
||||
"account suspended", "konto gesperrt", "credit card", "kreditkarte",
|
||||
"viagra", "cialis", "pharmacy", "apotheke", "discount", "rabatt",
|
||||
"million", "prize", "preis", "winner", "gewinner",
|
||||
]
|
||||
|
||||
|
||||
def find_spam_quote(subject: str, body: str) -> str:
|
||||
"""Findet den verdächtigsten Textausschnitt in der Mail."""
|
||||
full_text = f"{subject} {body}".lower()
|
||||
|
||||
for pattern in SPAM_PATTERNS:
|
||||
pos = full_text.find(pattern)
|
||||
if pos != -1:
|
||||
# Kontext um das Match herum extrahieren (max 120 Zeichen)
|
||||
original = f"{subject} {body}"
|
||||
start = max(0, pos - 30)
|
||||
end = min(len(original), pos + len(pattern) + 60)
|
||||
snippet = original[start:end].strip()
|
||||
if start > 0:
|
||||
snippet = "..." + snippet
|
||||
if end < len(original):
|
||||
snippet = snippet + "..."
|
||||
return snippet
|
||||
|
||||
# Kein Pattern gefunden -> ersten Satz des Bodys als Fallback
|
||||
if body:
|
||||
first_sentence = body.split(".")[0].strip()
|
||||
return first_sentence[:120] + ("..." if len(first_sentence) > 120 else "")
|
||||
return subject[:120] if subject else ""
|
||||
|
||||
|
||||
def build_reason(spam_prob: float, is_foreign: bool, language: str) -> str:
|
||||
"""Baut eine menschenlesbare Begründung zusammen."""
|
||||
reasons = []
|
||||
|
||||
if spam_prob > 0.8:
|
||||
reasons.append(f"High spam confidence ({spam_prob:.0%})")
|
||||
elif spam_prob > 0.5:
|
||||
reasons.append(f"Moderate spam confidence ({spam_prob:.0%})")
|
||||
elif spam_prob > 0.3:
|
||||
reasons.append(f"Low spam confidence ({spam_prob:.0%})")
|
||||
else:
|
||||
reasons.append(f"Likely ham ({1 - spam_prob:.0%} confidence)")
|
||||
|
||||
if is_foreign:
|
||||
reasons.append(f"Unexpected language: {language} (not DE/EN)")
|
||||
|
||||
return "; ".join(reasons)
|
||||
|
||||
|
||||
@app.post("/classify", response_model=ClassifyResponse)
|
||||
async def classify(request: ClassifyRequest):
|
||||
# Kombiniere Mail-Felder zu einem Text
|
||||
|
|
@ -106,12 +167,17 @@ async def classify(request: ClassifyRequest):
|
|||
# Spam-Schwelle nach Bonus neu bewerten
|
||||
effective_spam = spam_prob > 0.5 or (is_foreign and spam_prob > 0.3)
|
||||
|
||||
reason = build_reason(spam_prob, is_foreign, language)
|
||||
quote = find_spam_quote(request.subject, request.body) if effective_spam else ""
|
||||
|
||||
return ClassifyResponse(
|
||||
is_spam=effective_spam,
|
||||
confidence=spam_prob,
|
||||
score=round(rspamd_score, 2),
|
||||
language=language,
|
||||
foreign_lang_bonus=lang_bonus,
|
||||
reason=reason,
|
||||
quote=quote,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue