#!/usr/bin/perl -w #Syn-3 spam learning cronjob - (C) 2005 DatuX #Edwin Eefting use strict; my $maildir="/home/system/cyrus-imap/maildir/user"; my $mailcachefile="/home/system/cyrus-dspam/mails.cache"; my $learnedfile="/home/system/cyrus-dspam/learn.cache"; my $test=0; if ($ARGV[0]) { $test=1; print "TEST MODE - Not changing anything!\n"; } #read mailcache, so that we know what's processed my %mailcache; open (MAILCACHE,$mailcachefile); while () { chomp(); #1=processed before, not checked this time after processing #2=processed before, and still there $mailcache{$_}=1; } close (MAILCACHE); #read learning list, so that we don't learn something twice and #can unlearn stuff as well my %learned; open (LEARNED,$learnedfile); while () { chomp(); my ($signature,$time)=split(/=/,$_); $learned{$signature}=$time; #$learned{$signature}{'mode'}=$mode; } close (LEARNED); #learn by correcting a message with dspam sub Learn { my ($class,$user,$signature)=@_; $user=~ s/\'/_/g; $signature=~ s/\'/_/g; $class=~ s/\'/_/g; if (!exists($learned{$signature})) { system("dspam --mode=teft --source=error --user '$user' --signature='$signature' --class='$class'"); } else { print "(Skipping, already learned)\n"; } $learned{$signature}=time(); }; open (USERS,"find '$maildir' -maxdepth 1 -printf '%f\n'|") or die ; my $user; while ($user=) { chomp ($user); if (-e "$maildir/$user/Spam") { print "Processing user $user\n"; } else { print "Skipping user $user (no Spam folder)\n"; next; } #find all mails, but skip trash and send mails open (MAILS,"find '$maildir/$user' -type f -name '*.'|grep -v '/Trash/'|"); my $mail; my $reconstruct=0; while ($mail=) { chomp($mail); #not yet processed? #TODO: only process if the mail is in the folder for more than 24 hours? #(in case the user made a mistake and can move it back within 24 hours) if (! $mailcache{$mail}) { #read first 5k of mail open (MAILDATA,$mail); my $maildata; read(MAILDATA,$maildata,5000); my ($headercheck,$spamresult)=($maildata=~ /(.*)\nX-DSPAM-Result: ([a-zA-Z]*)\r/s); my ($signature)=($maildata=~ /.*\nX-DSPAM-Signature: ([0-9a-zA-Z]*)\r/s); if ($spamresult && !($headercheck=~/\r\n\r\n/)) { if ($test) { print "Test: Found $mail ($signature) result $spamresult\n"; } #is it NOT spam, in the spamfolder? if ($spamresult ne 'Spam' && $mail=~/Spam/) { if ($test) { print "Test: Would have corrected to SPAM: $mail ($signature)\n"; } else { #learn as spam print "Correcting to SPAM: $mail ($signature)\n"; Learn("spam",$user,$signature); #delete it unlink("$mail"); #mailbox needs reconstruction because of our action $reconstruct=1; } } #is it spam, but NOT in spamfolder? elsif ($spamresult eq 'Spam' && ! ($mail=~/Spam/)) { if ($test) { print "Test: Would have corrected to INNOCENT: $mail ($signature)\n"; } else { #learn as innocent print "Correcting to INNOCENT: $mail ($signature)\n"; Learn("innocent",$user,$signature); } } #other combinations else { #TODO: program could be extended to UNLEARN mails that are } } else { #unkown, just ignore it to be safe if ($test) { print "Test: Cant find headers in $mail?\n"; } } } #mark as found and processed $mailcache{$mail}=2; } close(MAILS); if (!$test && $reconstruct) { print "Reconstructing spamfolder of $user...\n"; system ("su - cyrus -c '/usr/cyrus/bin/reconstruct user.$user.Spam'"); } } if ($test) { print "Test mode, not storing caches\n"; exit; } #store new mailcache open (MAILCACHE,">$mailcachefile"); foreach my $mail (keys(%mailcache)) { #2=mail processed this time if ($mailcache{$mail}==2) { print MAILCACHE "$mail\n"; } } close (MAILCACHE); #store new learned list open (LEARNED,">$learnedfile"); foreach my $signature (keys(%learned)) { #TODO:remove old entries print LEARNED "$signature=$learned{$signature}\n"; } close (LEARNED);