Initial commit: SpamLLM - DistilBERT spam classifier for RSpamd
Multilingual spam classifier (DE/EN) with language detection. Non-DE/EN mails receive an additional spam score bonus. - train.py: Fine-tune distilbert-base-multilingual-cased on spam/ham data - server.py: FastAPI service with langdetect integration - rspamd/: Lua plugin and config for RSpamd integration - export_rspamd_data.py: Export Maildir folders to CSV training data - test_classify.py: Local model validation with DE/EN/foreign test cases Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
38efd20b4d
7 changed files with 671 additions and 0 deletions
26
rspamd/local.d/external_services.conf
Normal file
26
rspamd/local.d/external_services.conf
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# RSpamd External Service Konfiguration für SpamLLM
|
||||
# Kopiere diese Datei nach /etc/rspamd/local.d/external_services.conf
|
||||
|
||||
spamllm {
|
||||
# Typ: HTTP-basierter externer Service
|
||||
type = "http";
|
||||
|
||||
# URL des SpamLLM FastAPI Service
|
||||
url = "http://127.0.0.1:8000/classify";
|
||||
|
||||
# Timeout in Sekunden
|
||||
timeout = 5.0;
|
||||
|
||||
# Maximale Nachrichtengröße die an den Service gesendet wird (in Bytes)
|
||||
max_size = 50k;
|
||||
|
||||
# Symbol das bei Spam-Erkennung gesetzt wird
|
||||
symbol = "SPAMLLM_SPAM";
|
||||
|
||||
# Score der dem Symbol zugewiesen wird (wird durch den Service dynamisch gesetzt)
|
||||
weight = 5.0;
|
||||
|
||||
# Nur Mails im Graubereich prüfen (Score zwischen 3 und 12)
|
||||
# Das spart Ressourcen: offensichtlicher Spam/Ham wird nicht an LLM geschickt
|
||||
condition = "not rspamd_config.is_local(task:get_from_ip()) and task:get_metric_score('default') > 3 and task:get_metric_score('default') < 12";
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue