1 | #!/usr/bin/perl -w |
---|
2 | #Syn-3 spam learning cronjob - (C) 2005 DatuX |
---|
3 | #Edwin Eefting |
---|
4 | |
---|
5 | use strict; |
---|
6 | my $maildir="/home/system/cyrus-imap/maildir/user"; |
---|
7 | my $mailcachefile="/home/system/cyrus-dspam/mails.cache"; |
---|
8 | my $learnedfile="/home/system/cyrus-dspam/learn.cache"; |
---|
9 | my $test=0; |
---|
10 | |
---|
11 | if ($ARGV[0]) |
---|
12 | { |
---|
13 | $test=1; |
---|
14 | print "TEST MODE - Not changing anything!\n"; |
---|
15 | } |
---|
16 | |
---|
17 | |
---|
18 | #read mailcache, so that we know what's processed |
---|
19 | my %mailcache; |
---|
20 | open (MAILCACHE,$mailcachefile); |
---|
21 | while (<MAILCACHE>) |
---|
22 | { |
---|
23 | chomp(); |
---|
24 | #1=processed before, not checked this time after processing |
---|
25 | #2=processed before, and still there |
---|
26 | $mailcache{$_}=1; |
---|
27 | } |
---|
28 | close (MAILCACHE); |
---|
29 | |
---|
30 | #read learning list, so that we don't learn something twice and |
---|
31 | #can unlearn stuff as well |
---|
32 | my %learned; |
---|
33 | open (LEARNED,$learnedfile); |
---|
34 | while (<LEARNED>) |
---|
35 | { |
---|
36 | chomp(); |
---|
37 | my ($signature,$time)=split(/=/,$_); |
---|
38 | $learned{$signature}=$time; |
---|
39 | #$learned{$signature}{'mode'}=$mode; |
---|
40 | } |
---|
41 | close (LEARNED); |
---|
42 | |
---|
43 | |
---|
44 | #learn by correcting a message with dspam |
---|
45 | sub Learn |
---|
46 | { |
---|
47 | my ($class,$user,$signature)=@_; |
---|
48 | $user=~ s/\'/_/g; |
---|
49 | $signature=~ s/\'/_/g; |
---|
50 | $class=~ s/\'/_/g; |
---|
51 | if (!exists($learned{$signature})) |
---|
52 | { |
---|
53 | system("dspam --mode=teft --source=error --user '$user' --signature='$signature' --class='$class'"); |
---|
54 | } |
---|
55 | else |
---|
56 | { |
---|
57 | print "(Skipping, already learned)\n"; |
---|
58 | } |
---|
59 | $learned{$signature}=time(); |
---|
60 | }; |
---|
61 | |
---|
62 | |
---|
63 | open (USERS,"find '$maildir' -maxdepth 1 -printf '%f\n'|") or die ; |
---|
64 | my $user; |
---|
65 | while ($user=<USERS>) |
---|
66 | { |
---|
67 | chomp ($user); |
---|
68 | if (-e "$maildir/$user/Spam") |
---|
69 | { |
---|
70 | print "Processing user $user\n"; |
---|
71 | } |
---|
72 | else |
---|
73 | { |
---|
74 | print "Skipping user $user (no Spam folder)\n"; |
---|
75 | next; |
---|
76 | } |
---|
77 | #find all mails, but skip trash and send mails |
---|
78 | open (MAILS,"find '$maildir/$user' -type f -name '*.'|grep -v '/Trash/'|"); |
---|
79 | my $mail; |
---|
80 | my $reconstruct=0; |
---|
81 | while ($mail=<MAILS>) |
---|
82 | { |
---|
83 | chomp($mail); |
---|
84 | #not yet processed? |
---|
85 | #TODO: only process if the mail is in the folder for more than 24 hours? |
---|
86 | #(in case the user made a mistake and can move it back within 24 hours) |
---|
87 | if (! $mailcache{$mail}) |
---|
88 | { |
---|
89 | #read first 5k of mail |
---|
90 | open (MAILDATA,$mail); |
---|
91 | my $maildata; |
---|
92 | read(MAILDATA,$maildata,5000); |
---|
93 | my ($headercheck,$spamresult)=($maildata=~ /(.*)\nX-DSPAM-Result: ([a-zA-Z]*)\r/s); |
---|
94 | my ($signature)=($maildata=~ /.*\nX-DSPAM-Signature: ([0-9a-zA-Z]*)\r/s); |
---|
95 | if ($spamresult && !($headercheck=~/\r\n\r\n/)) |
---|
96 | { |
---|
97 | if ($test) |
---|
98 | { |
---|
99 | print "Test: Found $mail ($signature) result $spamresult\n"; |
---|
100 | } |
---|
101 | #is it NOT spam, in the spamfolder? |
---|
102 | if ($spamresult ne 'Spam' && $mail=~/Spam/) |
---|
103 | { |
---|
104 | if ($test) |
---|
105 | { |
---|
106 | print "Test: Would have corrected to SPAM: $mail ($signature)\n"; |
---|
107 | } |
---|
108 | else |
---|
109 | { |
---|
110 | #learn as spam |
---|
111 | print "Correcting to SPAM: $mail ($signature)\n"; |
---|
112 | Learn("spam",$user,$signature); |
---|
113 | #delete it |
---|
114 | unlink("$mail"); |
---|
115 | #mailbox needs reconstruction because of our action |
---|
116 | $reconstruct=1; |
---|
117 | } |
---|
118 | } |
---|
119 | #is it spam, but NOT in spamfolder? |
---|
120 | elsif ($spamresult eq 'Spam' && ! ($mail=~/Spam/)) |
---|
121 | { |
---|
122 | if ($test) |
---|
123 | { |
---|
124 | print "Test: Would have corrected to INNOCENT: $mail ($signature)\n"; |
---|
125 | } |
---|
126 | else |
---|
127 | { |
---|
128 | #learn as innocent |
---|
129 | print "Correcting to INNOCENT: $mail ($signature)\n"; |
---|
130 | Learn("innocent",$user,$signature); |
---|
131 | } |
---|
132 | } |
---|
133 | #other combinations |
---|
134 | else |
---|
135 | { |
---|
136 | #TODO: program could be extended to UNLEARN mails that are |
---|
137 | } |
---|
138 | } |
---|
139 | else |
---|
140 | { |
---|
141 | #unkown, just ignore it to be safe |
---|
142 | if ($test) |
---|
143 | { |
---|
144 | print "Test: Cant find headers in $mail?\n"; |
---|
145 | } |
---|
146 | } |
---|
147 | |
---|
148 | } |
---|
149 | #mark as found and processed |
---|
150 | $mailcache{$mail}=2; |
---|
151 | } |
---|
152 | close(MAILS); |
---|
153 | |
---|
154 | if (!$test && $reconstruct) |
---|
155 | { |
---|
156 | print "Reconstructing spamfolder of $user...\n"; |
---|
157 | system ("su - cyrus -c '/usr/cyrus/bin/reconstruct user.$user.Spam'"); |
---|
158 | } |
---|
159 | |
---|
160 | } |
---|
161 | |
---|
162 | |
---|
163 | if ($test) |
---|
164 | { |
---|
165 | print "Test mode, not storing caches\n"; |
---|
166 | exit; |
---|
167 | } |
---|
168 | |
---|
169 | #store new mailcache |
---|
170 | open (MAILCACHE,">$mailcachefile"); |
---|
171 | foreach my $mail (keys(%mailcache)) |
---|
172 | { |
---|
173 | #2=mail processed this time |
---|
174 | if ($mailcache{$mail}==2) |
---|
175 | { |
---|
176 | print MAILCACHE "$mail\n"; |
---|
177 | } |
---|
178 | } |
---|
179 | close (MAILCACHE); |
---|
180 | |
---|
181 | |
---|
182 | #store new learned list |
---|
183 | open (LEARNED,">$learnedfile"); |
---|
184 | foreach my $signature (keys(%learned)) |
---|
185 | { |
---|
186 | #TODO:remove old entries |
---|
187 | print LEARNED "$signature=$learned{$signature}\n"; |
---|
188 | } |
---|
189 | close (LEARNED); |
---|
190 | |
---|
191 | |
---|