|
|
(3 intermediate revisions by one other user not shown) |
Line 1: |
Line 1: |
− | Spamassassin's Bayesian database needs a blanced suply of both Spam and Ham in order to function properly.
| + | I've moved this to [[HowTos/SpamAssassin|SpamAssassin How-To]]. Many thanks for your Contribution. Florian. |
− | By feeding in false positives as Ham, and feeding false negatives as spam, we can keep the bayes database up to date.
| + | |
− | Spamassassin also provides a facility to report spam to various anit-spam sites such as Razor, Pyzor and SpamCop.
| + | |
− | Using the mboxadmin facility of Scalix, we can automate this task quite easily.
| + | |
− | However, we need to be careful about what we feed into the bayes. We can't always trust our users to put spam into the right folders, and we can't expect them to hand-feed ham into our bayes. Many people use a public folder for their spam. This alows everyone to dump their false-negatives into a single folder, and automatically feed it into the bayes. Unfortunately, this doesn't allow for feeding it ham as well, and bayes needs a balanced diet. The other problem with public folders is that they are just that - public. We can't expect users to place ham into a public folder for all to see.
| + | |
− | Here is a method for ensuring your bayes gets fed a proper balanced diet, and only spam gets fed in as spam, and only spam gets fed in as spam.
| + | |
− | <BR>
| + | |
− | Firstly, create an account which has mboxadmin provileges.
| + | |
− | <BR>
| + | |
− | Set up two cron jobs on your server. Run this script every hour:
| + | |
− | <BR>
| + | |
− | <pre>
| + | |
− | #!/usr/local/bin/perl
| + | |
− | use strict;
| + | |
− | use warnings;
| + | |
− | use Mail::IMAPClient;
| + | |
− | my $host="your_mail_server_ip";
| + | |
− | my $username="mboxadmin_user_name";
| + | |
− | my $password="mboxadmin_password";
| + | |
− | my @real_users=`/opt/scalix/bin/omshowu -m all -i`; # get all real user names.
| + | |
− | foreach my $punter (@real_users) # Loop over them all.
| + | |
− | {
| + | |
− | chomp $punter; # Remove trailing carriage return.
| + | |
− | print "$punter\n"; # Some output. Feel free to remove.
| + | |
− | my $user="mboxadmin:$username:$punter"; # Set up superuser login.
| + | |
− | my $imap = new Mail::IMAPClient( 'Server' => $host , 'User' => $user , 'Password' => $password ) or next; # connect to server.
| + | |
− | my @folders=$imap->folders; # list folders.
| + | |
− | foreach my $folder (@folders) # Look through each of them.
| + | |
− | {
| + | |
− | if (lc($folder) eq "junk e-mail") # "junk email" folder.
| + | |
− | {
| + | |
− | print "Found a spam folder: $folder\n";
| + | |
− | $imap->select($folder) or next; # Select the folder.
| + | |
− | print "Folder $folder selected.\n";
| + | |
− | my @list=$imap->messages or next; # List all messages in folder.
| + | |
− | print scalar(@list)." messages in folder.\n";
| + | |
− | foreach my $msg (reverse(@list)) # Loop over them all.
| + | |
− | {
| + | |
− | my @email=$imap->fetch($msg,'RFC822'); # Fetch message.
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --spam") or print "$!\n"; # Feed to sa-learn.
| + | |
− | print SALEARN "$email[1]";
| + | |
− | close SALEARN;
| + | |
− | open (REPORT,"|/usr/bin/spamassassin -d | /usr/bin/spamassassin -r") or print "$!\n"; # Report it. (SpamCop and Pyzor).
| + | |
− | print REPORT "$email[1]";
| + | |
− | close REPORT;
| + | |
− | $imap->delete_message($msg) or next; # Delete it.
| + | |
− | }
| + | |
− | $imap->expunge($folder) or next; #Expunge folder.
| + | |
− | }
| + | |
− | }
| + | |
− | }
| + | |
− | | + | |
− | | + | |
− | | + | |
− | </pre>
| + | |
− | | + | |
− | | + | |
− | <BR>
| + | |
− | And this one every week:
| + | |
− | <BR>
| + | |
− | <pre>
| + | |
− | #!/usr/bin/perl
| + | |
− | use strict;
| + | |
− | use warnings;
| + | |
− | use Mail::IMAPClient;
| + | |
− | my $host="your_server_ip_address";
| + | |
− | my $username="mboxadmin_user_name";
| + | |
− | my $password="mboxadmin_password";
| + | |
− | my @real_users=`/opt/scalix/bin/omshowu -m all -i`; # get all real user names.
| + | |
− | foreach my $punter (@real_users) # Loop over them all.
| + | |
− | {
| + | |
− | chomp $punter; # Remove trailing carriage return.
| + | |
− | print "$punter\n"; # Some output. Feel free to remove.
| + | |
− | my $user="mboxadmin:$username:$punter"; # Set up superuser login.
| + | |
− | my $imap = new Mail::IMAPClient( 'Server' => $host , 'User' => $user , 'Password' => $password ) or next; # connect to server.
| + | |
− | my @folders=$imap->folders; # list folders.
| + | |
− | foreach my $folder (@folders) # Look through each of them.
| + | |
− | {
| + | |
− | if (lc($folder) eq "inbox") # "Inbox" is guaranteed to only have ham in it.
| + | |
− | {
| + | |
− | print "Inbox found.\n"; # Some debug output.
| + | |
− | $imap->select($folder) or next; # Select folder.
| + | |
− | print "Folder $folder selected.\n";
| + | |
− | my @list=$imap->seen or next; # Get only messages which have been read. Saves the possibility of reading in false positives. Also stops us interfering with people's mail.
| + | |
− | print scalar(@list)." messages in folder.\n";
| + | |
− | my $counter=0; # Initialise counter. - we don't want the entire inbox.
| + | |
− | foreach my $msg (@list) # Loop over each message.
| + | |
− | {
| + | |
− | my @email=$imap->fetch($msg,'RFC822'); # Fetch it.
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --ham") or next; # Feed it to sa-learn.
| + | |
− | print SALEARN "$email[1]\n";
| + | |
− | close SALEARN;
| + | |
− | $counter +=1; # Increment counter.
| + | |
− | last if ($counter>100); # We only want 100 messages.
| + | |
− | }
| + | |
− | }
| + | |
− | elsif (lc($folder) eq "possible spam") # "Possible Spam" folder.
| + | |
− | {
| + | |
− | print "Found a spam folder: $folder\n";
| + | |
− | $imap->select($folder) or next; # Select the folder.
| + | |
− | print "Folder $folder selected.\n";
| + | |
− | my $lastweek=time()-604800; # Get timestamp for this time last week.
| + | |
− | my @list = $imap->before($lastweek) or next; # List all messages older than that.
| + | |
− | print scalar(@list)." messages in folder.\n";
| + | |
− | foreach my $msg (reverse(@list)) # Loop over them all.
| + | |
− | {
| + | |
− | my @email=$imap->fetch($msg,'RFC822'); # Fetch message.
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --spam") or print "$!\n"; # Feed to sa-learn.
| + | |
− | print SALEARN "$email[1]";
| + | |
− | close SALEARN;
| + | |
− | open (REPORT,"|/usr/bin/spamassassin -d | /usr/bin/spamassassin -r") or print "$!\n"; # Report it. (SpamCop and Pyzor).
| + | |
− | print REPORT "$email[1]";
| + | |
− | close REPORT;
| + | |
− | $imap->delete_message($msg) or next; # Delete it.
| + | |
− | }
| + | |
− | $imap->expunge($folder) or next; #Expunge folder.
| + | |
− | }
| + | |
− | elsif(lc($folder) eq "non-spam")
| + | |
− | {
| + | |
− | $imap->select($folder) or next; # Select the folder.
| + | |
− | print "Folder $folder selected.\n";
| + | |
− | my @list=$imap->messages or next; # List all messages in folder.
| + | |
− | print scalar(@list)." messages in folder.\n";
| + | |
− | foreach my $msg (reverse(@list)) # Loop over them all.
| + | |
− | {
| + | |
− | my @email=$imap->fetch($msg,'RFC822'); # Fetch message.
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --forget") or print "$!\n";# Sa-learn forget this message if already seen.
| + | |
− | print SALEARN "$email[1]";
| + | |
− | close SALEARN or print "$!\n";
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --ham") or next; # Feed to sa-learn as ham.
| + | |
− | print SALEARN "$email[1]";
| + | |
− | close SALEARN;
| + | |
− | }
| + | |
− |
| + | |
− | }
| + | |
− | elsif (lc($folder) eq "spam") # "spam" folder.
| + | |
− | {
| + | |
− | print "Found a spam folder: $folder\n";
| + | |
− | $imap->select($folder) or next; # Select the folder.
| + | |
− | print "Folder $folder selected.\n";
| + | |
− | my $lastweek=time()-604800; # Get timestamp for this time last week.
| + | |
− | my @list = $imap->before($lastweek) or next; # List all messages older than that.
| + | |
− | print scalar(@list)." messages in folder.\n";
| + | |
− | foreach my $msg (reverse(@list)) # Loop over them all.
| + | |
− | {
| + | |
− | my $subject=$imap->subject($msg); # Fetch subject for message.
| + | |
− | my @email=$imap->fetch($msg,'RFC822'); # Fetch message.
| + | |
− | unless ($subject=~m/\[SPAM\]/)
| + | |
− | {
| + | |
− | print "Learning message with subject: $subject\n";
| + | |
− | open (SALEARN,"|/usr/bin/spamassassin -d | /usr/bin/sa-learn --spam") or print "$!\n"; # Feed to sa-learn.
| + | |
− | print SALEARN "$email[1]";
| + | |
− | close SALEARN;
| + | |
− | }
| + | |
− | open (REPORT,"|/usr/bin/spamassassin -d | /usr/bin/spamassassin -r") or print "$!\n"; # Report it. (SpamCop and Pyzor).
| + | |
− | print REPORT "$email[1]";
| + | |
− | close REPORT;
| + | |
− | $imap->delete_message($msg) or next; # Delete it.
| + | |
− | }
| + | |
− | $imap->expunge($folder) or next; #Expunge folder.
| + | |
− | }
| + | |
− |
| + | |
− | }
| + | |
− | }
| + | |
− | | + | |
− | </pre>
| + | |
− | <br>
| + | |
− | When I come back, I'll tell you what it does and why I've done it that way.
| + | |