Moderators: ScalixSupport, admin
Code: Select all
#!/usr/bin/perl
use strict;
use warnings;
use Mail::IMAPClient;
my $usage =
"ARGS must be :
\targv1 : mbox file
\targv2 : imap host
\targv3 : imap user (password will be prompted)
\targv4 : destination mailbox on imap server
\targv5 : password\n";
die($usage) if(@ARGV != 5);
my ($file,$host,$user,$dest,$password) = @ARGV;
my $imap = new Mail::IMAPClient( 'Server' => $host , 'User' => $user , 'Password' => $password ) or die "Unable to connect to imap
server\n";
foreach my $folder ($imap->folders) {
$imap->select($folder) or die "Unable to select folder $@\n";
if ($folder eq $dest) {
print "-- Messages in $folder --\n";
open (MBOX_SPAM, ">$file") or die "Can't open file $file: $!";
my @list = $imap->messages or die "$folder: Unable to fetch message list $@";
foreach my $mess (@list){
my @output = $imap->fetch(($mess,'RFC822')) or die "Unable to fetch $@";
print MBOX_SPAM "$output[1]" if(defined($output[1]));
}
### Remove seen messages, because we don't need them anymore
my $nrDeleted = $imap->delete_message( scalar($imap->seen) ) or warn "Could not delete_message: $@\n";
print "$nrDeleted messages deleted\n";
### Ok, the messages are deleted, but in fact they aren't (welcome to IMAP ;-))
### So, we should expunge the folder to actually delete the messages
$imap->expunge($folder) or die "Could not expunge: $@\n";
close (MBOX_SPAM);
### Exit foreach, because we handled the required folder and there's no need to loop further
### over the remaining folders
last;
}
}
$imap->disconnect() or die "Unable to disconnect\n";
print "export of imap folder to mbox format finished\n";
Code: Select all
#!/bin/bash
# This script takes a mail file full of SPAM and sa-learns it for you.
# sa-learn apparently will not split the mails apart to learn them. this
# script splits the mails in the mail file apart, runs them thru
# spamassassin -d to remove the markup, and feeds them to sa-learn.
# Specify the file on the command line, or change it here:
# this is the file with the spam you need to sa-learn
spamfile='/usr/local/data/spam.mbox'
# Override if you've specified one on the command line
if [[ "$1!" != "!" ]]; then spamfile=$1; fi
# Temp directory:
tmpdr="/tmp/"
if ( ! [ -r $spamfile ] ) ; then echo "Can't read $spamfile ... does it exist?"
exit ; fi
echo "Learning SPAM in $spamfile . . ."
# Let's copy your file, so if it is changed while we're working with it,
# we're ok. (TODO: implement locking?)
spamrnd="${tmpdr}spam${RANDOM}"
cp $spamfile $spamrnd
spamfile=$spamrnd
# this is a temporary file used for processing
tmpfile="${tmpdr}tmp${RANDOM}"
# this is the regular expression I stole from grepmail
# tmpfile will have a list of the line numbers that start new emails:
# CREDIT: Written by David Coppit (david@coppit.org, http://coppit.org/)
grep --extended-regexp --line-number "^(Return-Path: .*|X-Draft-From: .*|X-From-Line: .*|From [^:]+(:[0-9][0-9]){1,2} ([A-Z]{2,3} [0
-9]{4}|[0-9]{4} [+-][0-9]{4}|[0-9]{4})( remote from .*)?)\$" $spamfile | sed "s/:.*//" > $tmpfile
# nummails will have the number of emails:
cp $tmpfile $tmpfile.copy
nummails=`grep -c . $tmpfile`
echo "$nummails message(s) . . ."
# now we can seperate out the emails and work on them.
for ((x=1; x<nummails; x++)); do
linea=`awk -v a=$x -- '{ if (FNR == a) print }' < $tmpfile`
lineb=`awk -v a=$((x+1)) -- '{ if (FNR == a) print }' < $tmpfile`
awk -v a=$linea -v b=$lineb -- '{ if ((FNR>=a)&&(FNR<b)) print }' < $spamfile | spamassassin -d | sa-learn --spam ; done
linea=`awk -v a=$x -- '{ if (FNR == a) print }' < $tmpfile`
awk -v a=$linea -- '{ if (FNR>=a) print }' < $spamfile | spamassassin -d | sa-learn --spam
rm -f $tmpfile
rm -f $spamfile
Code: Select all
# Add your own customisations to this file. See 'man Mail::SpamAssassin::Conf'
# for details of what can be tweaked.
#
# do not change the subject
# to change the subject, e.g. use
# rewrite_header Subject ****SPAM(_SCORE_)****
rewrite_header Subject
# Set the score required before a mail is considered spam.
required_score 3.50
# Encapsulate spam in an attachment (0=no, 1=yes, 2=safe)
report_safe 1
# Enable the Bayes system
use_bayes 1
# Enable Bayes auto-learning
bayes_auto_learn 1
# Enable or disable network checks
skip_rbl_checks 0
use_razor2 1
use_dcc 1
use_pyzor 1
# Mail using languages used in these country codes will not be marked
# as being possibly spam in a foreign language.
# - dutch english french german
ok_languages nl en fr de
# Mail using locales used in these country codes will not be marked
# as being possibly spam in a foreign language.
ok_locales en
Code: Select all
vmsrv-scalix:~ # sa-learn --dump magic
0.000 0 3 0 non-token data: bayes db version
0.000 0 100 0 non-token data: nspam
0.000 0 617 0 non-token data: nham
0.000 0 74497 0 non-token data: ntokens
0.000 0 1119868955 0 non-token data: oldest atime
0.000 0 1138816237 0 non-token data: newest atime
0.000 0 0 0 non-token data: last journal sync atime
0.000 0 0 0 non-token data: last expiry atime
0.000 0 0 0 non-token data: last expire atime delta
0.000 0 0 0 non-token data: last expire reduction count
ah4279 wrote:We are just transitioning from a WinBloze based mail server to FC4/Scalix and we are getting a lot of spam. Using the out of box spamassassin rules and scores we are catching ~60% of the spam messages.
I would like my users to save the spam messages to that I can eventually train spamassassin with sa-learn after we have built up a large set of spam.
My question is, how do I go about getting the spam and ham messages from a users account into a format that spamassassin will train from?
Any suggestions?
Code: Select all
$imap->select($folder) or die "Unable to select folder $@\n";
Code: Select all
$imap->select($folder) or next;
Users browsing this forum: No registered users and 2 guests