1 | /* $Id: dspam_stats.c,v 1.36 2011/06/28 00:13:48 sbajic Exp $ */ |
---|
2 | |
---|
3 | /* |
---|
4 | DSPAM |
---|
5 | COPYRIGHT (C) 2002-2012 DSPAM PROJECT |
---|
6 | |
---|
7 | This program is free software: you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Affero General Public License as |
---|
9 | published by the Free Software Foundation, either version 3 of the |
---|
10 | License, or (at your option) any later version. |
---|
11 | |
---|
12 | This program is distributed in the hope that it will be useful, |
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | GNU Affero General Public License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Affero General Public License |
---|
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
19 | |
---|
20 | */ |
---|
21 | |
---|
22 | #ifdef HAVE_CONFIG_H |
---|
23 | #include <auto-config.h> |
---|
24 | #endif |
---|
25 | |
---|
26 | #include <stdio.h> |
---|
27 | #include <stdlib.h> |
---|
28 | #include <string.h> |
---|
29 | #include <fcntl.h> |
---|
30 | #include <ctype.h> |
---|
31 | #include <sys/types.h> |
---|
32 | #include <sys/stat.h> |
---|
33 | #include <errno.h> |
---|
34 | #include <signal.h> |
---|
35 | #ifndef _WIN32 |
---|
36 | #include <unistd.h> |
---|
37 | #include <dirent.h> |
---|
38 | #endif |
---|
39 | #include "config.h" |
---|
40 | |
---|
41 | #include "libdspam.h" |
---|
42 | #include "read_config.h" |
---|
43 | #include "config_api.h" |
---|
44 | #include "language.h" |
---|
45 | #include "util.h" |
---|
46 | |
---|
47 | #define TSYNTAX "syntax: dspam_stats [-h]\|[--profile=PROFILE] [-HrsSt] [user [user...]]" |
---|
48 | |
---|
49 | #ifdef _WIN32 |
---|
50 | /* no trusted users under Windows */ |
---|
51 | #undef TRUSTED_USER_SECURITY |
---|
52 | #endif |
---|
53 | |
---|
54 | DSPAM_CTX *open_ctx, *open_mtx; |
---|
55 | int opt_humanfriendly; |
---|
56 | int opt_reset; |
---|
57 | int opt_snapshot; |
---|
58 | int opt_stats; |
---|
59 | int opt_total; |
---|
60 | |
---|
61 | int stat_user (const char *username, struct _ds_spam_totals *totals); |
---|
62 | int process_all_users (struct _ds_spam_totals *totals); |
---|
63 | void dieout (int signal); |
---|
64 | void usage (void); |
---|
65 | |
---|
66 | int |
---|
67 | main (int argc, char **argv) |
---|
68 | { |
---|
69 | int ch, i, users = 0; |
---|
70 | #ifndef HAVE_GETOPT |
---|
71 | int optind = 1; |
---|
72 | #endif |
---|
73 | struct _ds_spam_totals totals; |
---|
74 | |
---|
75 | #ifdef TRUSTED_USER_SECURITY |
---|
76 | struct passwd *p = getpwuid (getuid ()); |
---|
77 | int trusted = 0; |
---|
78 | #endif |
---|
79 | |
---|
80 | |
---|
81 | memset(&totals, 0, sizeof(struct _ds_spam_totals)); |
---|
82 | |
---|
83 | /* Read dspam.conf */ |
---|
84 | |
---|
85 | agent_config = read_config(NULL); |
---|
86 | if (!agent_config) { |
---|
87 | LOG(LOG_ERR, ERR_AGENT_READ_CONFIG); |
---|
88 | fprintf (stderr, ERR_AGENT_READ_CONFIG "\n"); |
---|
89 | exit(EXIT_FAILURE); |
---|
90 | } |
---|
91 | |
---|
92 | if (!_ds_read_attribute(agent_config, "Home")) { |
---|
93 | LOG(LOG_ERR, ERR_AGENT_DSPAM_HOME); |
---|
94 | fprintf (stderr, ERR_AGENT_DSPAM_HOME "\n"); |
---|
95 | _ds_destroy_config(agent_config); |
---|
96 | exit(EXIT_FAILURE); |
---|
97 | } |
---|
98 | |
---|
99 | if (libdspam_init(_ds_read_attribute(agent_config, "StorageDriver")) != 0) { |
---|
100 | LOG(LOG_ERR, ERR_DRV_INIT); |
---|
101 | fprintf (stderr, ERR_DRV_INIT "\n"); |
---|
102 | _ds_destroy_config(agent_config); |
---|
103 | exit(EXIT_FAILURE); |
---|
104 | } |
---|
105 | |
---|
106 | #ifdef TRUSTED_USER_SECURITY |
---|
107 | if (_ds_match_attribute(agent_config, "Trust", p->pw_name) || !p->pw_uid) { |
---|
108 | trusted = 1; |
---|
109 | } |
---|
110 | #endif |
---|
111 | |
---|
112 | for(i=0;i<argc;i++) { |
---|
113 | if (!strncmp (argv[i], "--profile=", 10)) |
---|
114 | { |
---|
115 | #ifdef TRUSTED_USER_SECURITY |
---|
116 | if (!trusted) { |
---|
117 | LOG(LOG_ERR, ERR_TRUSTED_PRIV, "--profile", p->pw_uid, p->pw_name); |
---|
118 | fprintf (stderr, ERR_TRUSTED_PRIV "\n", "--profile", p->pw_uid, p->pw_name); |
---|
119 | _ds_destroy_config(agent_config); |
---|
120 | goto BAIL; |
---|
121 | } |
---|
122 | #endif |
---|
123 | if (!_ds_match_attribute(agent_config, "Profile", argv[i]+10)) { |
---|
124 | LOG(LOG_ERR, ERR_AGENT_NO_SUCH_PROFILE, argv[i]+10); |
---|
125 | fprintf (stderr, ERR_AGENT_NO_SUCH_PROFILE "\n", argv[i]+10); |
---|
126 | _ds_destroy_config(agent_config); |
---|
127 | goto BAIL; |
---|
128 | } else { |
---|
129 | _ds_overwrite_attribute(agent_config, "DefaultProfile", argv[i]+10); |
---|
130 | } |
---|
131 | break; |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | open_ctx = open_mtx = NULL; |
---|
136 | |
---|
137 | signal (SIGINT, dieout); |
---|
138 | #ifndef _WIN32 |
---|
139 | signal (SIGPIPE, dieout); |
---|
140 | #endif |
---|
141 | signal (SIGTERM, dieout); |
---|
142 | |
---|
143 | dspam_init_driver (NULL); |
---|
144 | |
---|
145 | /* Process command line */ |
---|
146 | ch = opt_humanfriendly = 0; |
---|
147 | opt_reset = opt_snapshot = opt_stats = opt_total = 0; |
---|
148 | |
---|
149 | #ifndef HAVE_GETOPT |
---|
150 | while ( argv[optind] && |
---|
151 | argv[optind][0] == '-' && |
---|
152 | (ch = argv[optind][1]) && |
---|
153 | argv[optind][2] == '\0' ) |
---|
154 | #else |
---|
155 | while((ch = getopt(argc, argv, "hHrsS")) != -1) |
---|
156 | #endif |
---|
157 | { |
---|
158 | switch(ch) { |
---|
159 | case 'h': |
---|
160 | /* print help, and then exit. usage exits for us */ |
---|
161 | usage(); |
---|
162 | break; |
---|
163 | case 'H': |
---|
164 | opt_humanfriendly = 1; |
---|
165 | break; |
---|
166 | case 'r': |
---|
167 | opt_reset = 1; |
---|
168 | break; |
---|
169 | case 's': |
---|
170 | opt_snapshot = 1; |
---|
171 | break; |
---|
172 | case 'S': |
---|
173 | opt_stats = 1; |
---|
174 | break; |
---|
175 | case 't': |
---|
176 | opt_total = 1; |
---|
177 | break; |
---|
178 | |
---|
179 | #ifndef HAVE_GETOPT |
---|
180 | default: |
---|
181 | fprintf(stderr, "%s: unknown option \"%s\".\n", |
---|
182 | argv[0], argv[optind] + 1); |
---|
183 | usage(); |
---|
184 | #endif |
---|
185 | } |
---|
186 | #ifndef HAVE_GETOPT |
---|
187 | optind++; |
---|
188 | #endif |
---|
189 | } |
---|
190 | #ifndef HAVE_GETOPT |
---|
191 | /* reset our option array and index to where we are after getopt */ |
---|
192 | argv += optind; |
---|
193 | argc -= optind; |
---|
194 | #endif |
---|
195 | |
---|
196 | /* process arguments */ |
---|
197 | for (i=0; i < argc; i++) |
---|
198 | { |
---|
199 | if (argv[i] && strncmp(argv[i], "--", 2)) { |
---|
200 | #ifdef TRUSTED_USER_SECURITY |
---|
201 | if ( !trusted && strcmp(argv[i], p->pw_name) ) |
---|
202 | { |
---|
203 | fprintf(stderr, ERR_TRUSTED_MODE "\n"); |
---|
204 | _ds_destroy_config(agent_config); |
---|
205 | goto BAIL; |
---|
206 | } |
---|
207 | #endif |
---|
208 | stat_user(argv[i], &totals); |
---|
209 | users++; |
---|
210 | } |
---|
211 | } |
---|
212 | |
---|
213 | if (!users) |
---|
214 | { |
---|
215 | #ifdef TRUSTED_USER_SECURITY |
---|
216 | if ( !trusted ) |
---|
217 | { |
---|
218 | fprintf(stderr, ERR_TRUSTED_MODE "\n"); |
---|
219 | _ds_destroy_config(agent_config); |
---|
220 | goto BAIL; |
---|
221 | } |
---|
222 | #endif |
---|
223 | |
---|
224 | process_all_users (&totals); |
---|
225 | } |
---|
226 | |
---|
227 | if (opt_total) |
---|
228 | stat_user(NULL, &totals); |
---|
229 | dspam_shutdown_driver (NULL); |
---|
230 | _ds_destroy_config(agent_config); |
---|
231 | libdspam_shutdown(); |
---|
232 | exit (EXIT_SUCCESS); |
---|
233 | |
---|
234 | BAIL: |
---|
235 | libdspam_shutdown(); |
---|
236 | exit(EXIT_FAILURE); |
---|
237 | } |
---|
238 | |
---|
239 | int |
---|
240 | process_all_users (struct _ds_spam_totals *totals) |
---|
241 | { |
---|
242 | DSPAM_CTX *CTX; |
---|
243 | char *user; |
---|
244 | |
---|
245 | CTX = dspam_create (NULL, NULL, _ds_read_attribute(agent_config, "Home"), DSM_TOOLS, 0); |
---|
246 | open_ctx = CTX; |
---|
247 | if (CTX == NULL) |
---|
248 | { |
---|
249 | fprintf (stderr, "Could not initialize context: %s\n", strerror (errno)); |
---|
250 | return EFAILURE; |
---|
251 | } |
---|
252 | |
---|
253 | set_libdspam_attributes(CTX); |
---|
254 | if (dspam_attach(CTX, NULL)) { |
---|
255 | LOG (LOG_WARNING, "unable to attach dspam context"); |
---|
256 | fprintf (stderr, "Unable to attach DSPAM context\n"); |
---|
257 | dspam_destroy(CTX); |
---|
258 | return EFAILURE; |
---|
259 | } |
---|
260 | |
---|
261 | user = _ds_get_nextuser (CTX); |
---|
262 | while (user != NULL) |
---|
263 | { |
---|
264 | stat_user (user, totals); |
---|
265 | user = _ds_get_nextuser (CTX); |
---|
266 | } |
---|
267 | |
---|
268 | dspam_destroy (CTX); |
---|
269 | open_ctx = NULL; |
---|
270 | return 0; |
---|
271 | } |
---|
272 | |
---|
273 | int |
---|
274 | stat_user (const char *username, struct _ds_spam_totals *totals) |
---|
275 | { |
---|
276 | DSPAM_CTX *MTX = NULL; |
---|
277 | long total_spam, total_innocent, spam_misclassified, innocent_misclassified, spam_corpusfed, innocent_corpusfed, all_spam, all_innocent; |
---|
278 | char filename[MAX_FILENAME_LENGTH]; |
---|
279 | FILE *file; |
---|
280 | struct _ds_spam_totals *tptr; |
---|
281 | |
---|
282 | if (username) { |
---|
283 | MTX = dspam_create (username, NULL, _ds_read_attribute(agent_config, "Home"), DSM_CLASSIFY, 0); |
---|
284 | open_mtx = MTX; |
---|
285 | if (MTX == NULL) |
---|
286 | { |
---|
287 | fprintf (stderr, "Could not init context: %s\n", strerror (errno)); |
---|
288 | return EUNKNOWN; |
---|
289 | } |
---|
290 | set_libdspam_attributes(MTX); |
---|
291 | if (dspam_attach(MTX, NULL)) { |
---|
292 | LOG (LOG_WARNING, "unable to attach dspam context"); |
---|
293 | fprintf (stderr, "Unable to attach DSPAM context\n"); |
---|
294 | return EUNKNOWN; |
---|
295 | } |
---|
296 | tptr = &MTX->totals; |
---|
297 | } else { |
---|
298 | tptr = totals; |
---|
299 | } |
---|
300 | |
---|
301 | /* Convenience variables. Compiling with optimization will cause this to |
---|
302 | have 0 slowdown, as it is essentially dead code */ |
---|
303 | total_spam = |
---|
304 | MAX(0, (tptr->spam_learned + tptr->spam_classified) - |
---|
305 | (tptr->spam_misclassified + tptr->spam_corpusfed)); |
---|
306 | total_innocent = |
---|
307 | MAX(0, (tptr->innocent_learned + tptr->innocent_classified) - |
---|
308 | (tptr->innocent_misclassified + tptr->innocent_corpusfed)); |
---|
309 | spam_misclassified = tptr->spam_misclassified; |
---|
310 | innocent_misclassified = tptr->innocent_misclassified; |
---|
311 | spam_corpusfed = tptr->spam_corpusfed; |
---|
312 | innocent_corpusfed = tptr->innocent_corpusfed; |
---|
313 | |
---|
314 | if (MTX) { |
---|
315 | totals->spam_learned += MTX->totals.spam_learned; |
---|
316 | totals->innocent_learned += MTX->totals.innocent_learned; |
---|
317 | totals->spam_misclassified += MTX->totals.spam_misclassified; |
---|
318 | totals->innocent_misclassified += MTX->totals.innocent_misclassified; |
---|
319 | totals->spam_corpusfed += MTX->totals.spam_corpusfed; |
---|
320 | totals->innocent_corpusfed += MTX->totals.innocent_corpusfed; |
---|
321 | } |
---|
322 | |
---|
323 | /* Subtract the snapshot from the current totals to get stats "since last |
---|
324 | reset" for the user */ |
---|
325 | |
---|
326 | if (opt_snapshot && username) { |
---|
327 | long s_total_spam, s_total_innocent, s_spam_misclassified, |
---|
328 | s_innocent_misclassified, s_spam_corpusfed, s_innocent_corpusfed; |
---|
329 | |
---|
330 | _ds_userdir_path(filename, _ds_read_attribute(agent_config, "Home"), |
---|
331 | username, "rstats"); |
---|
332 | _ds_prepare_path_for (filename); |
---|
333 | |
---|
334 | file = fopen (filename, "r"); |
---|
335 | if (file != NULL) { |
---|
336 | if (fscanf(file, "%ld,%ld,%ld,%ld,%ld,%ld", |
---|
337 | &s_total_spam, |
---|
338 | &s_total_innocent, |
---|
339 | &s_spam_misclassified, |
---|
340 | &s_innocent_misclassified, |
---|
341 | &s_spam_corpusfed, |
---|
342 | &s_innocent_corpusfed)==6) { |
---|
343 | total_spam -= s_total_spam; |
---|
344 | total_innocent -= s_total_innocent; |
---|
345 | spam_misclassified -= s_spam_misclassified; |
---|
346 | innocent_misclassified -= s_innocent_misclassified; |
---|
347 | spam_corpusfed -= s_spam_corpusfed; |
---|
348 | innocent_corpusfed -= s_innocent_corpusfed; |
---|
349 | } |
---|
350 | fclose(file); |
---|
351 | } |
---|
352 | } |
---|
353 | |
---|
354 | all_spam = total_spam + spam_misclassified, |
---|
355 | all_innocent = total_innocent + innocent_misclassified; |
---|
356 | |
---|
357 | if (opt_humanfriendly) |
---|
358 | { |
---|
359 | printf("%s:\n\ |
---|
360 | \tTP True Positives: %6ld\n\ |
---|
361 | \tTN True Negatives: %6ld\n\ |
---|
362 | \tFP False Positives: %6ld\n\ |
---|
363 | \tFN False Negatives: %6ld\n\ |
---|
364 | \tSC Spam Corpusfed: %6ld\n\ |
---|
365 | \tNC Nonspam Corpusfed: %6ld\n\ |
---|
366 | \tTL Training Left: %6ld\n\ |
---|
367 | \tSHR Spam Hit Rate % 7.2f%%\n\ |
---|
368 | \tHSR Ham Strike Rate: % 7.2f%%\n\ |
---|
369 | \tPPV Positive predictive value: % 7.2f%%\n\ |
---|
370 | \tOCA Overall Accuracy: % 7.2f%%\n\ |
---|
371 | \n", |
---|
372 | (username) ? username : "TOTAL", |
---|
373 | total_spam, total_innocent, |
---|
374 | innocent_misclassified, spam_misclassified, |
---|
375 | spam_corpusfed, innocent_corpusfed, |
---|
376 | MAX(0, 2500 - (tptr->innocent_learned + |
---|
377 | tptr->innocent_classified)), |
---|
378 | (all_spam) ? |
---|
379 | (100.0-((float)spam_misclassified / (float)all_spam )*100.0) |
---|
380 | : 100.0, |
---|
381 | (all_innocent) ? |
---|
382 | 100-(100.0-((float)innocent_misclassified / (float)all_innocent )*100.0) |
---|
383 | : 100.0, |
---|
384 | (total_spam + innocent_misclassified) ? |
---|
385 | 100-(100.0-((float)total_spam / |
---|
386 | (float)(total_spam + innocent_misclassified))*100) |
---|
387 | : 100.0, |
---|
388 | (all_spam + all_innocent) ? |
---|
389 | (100.0-(((float)spam_misclassified +(float)innocent_misclassified) / |
---|
390 | (float)(all_spam + all_innocent))*100.0) |
---|
391 | : 100.0); |
---|
392 | } |
---|
393 | else |
---|
394 | { |
---|
395 | #ifdef LONG_USERNAMES |
---|
396 | printf ("%s\n TP:%6ld TN:%6ld FP:%6ld FN:%6ld SC:%6ld NC:%6ld\n", |
---|
397 | #else |
---|
398 | printf ("%-16s TP:%6ld TN:%6ld FP:%6ld FN:%6ld SC:%6ld NC:%6ld\n", |
---|
399 | #endif |
---|
400 | (username) ? username : "TOTAL", |
---|
401 | total_spam, total_innocent, |
---|
402 | innocent_misclassified, spam_misclassified, |
---|
403 | spam_corpusfed, innocent_corpusfed); |
---|
404 | |
---|
405 | if (opt_stats) |
---|
406 | printf ( |
---|
407 | #ifdef LONG_USERNAMES |
---|
408 | " " |
---|
409 | #else |
---|
410 | " " |
---|
411 | #endif |
---|
412 | "SHR: % 7.2f%% HSR: % 7.2f%% OCA: % 7.2f%%\n", |
---|
413 | (all_spam) ? |
---|
414 | (100.0-((float)spam_misclassified / (float)all_spam )*100.0) |
---|
415 | : 100.0, |
---|
416 | (all_innocent) ? |
---|
417 | 100.0- |
---|
418 | (100.0-((float)innocent_misclassified / (float)all_innocent )*100.0) |
---|
419 | : 0.0, |
---|
420 | (all_spam + all_innocent) ? |
---|
421 | (100.0-(((float)spam_misclassified +(float)innocent_misclassified) / |
---|
422 | (float)(all_spam + all_innocent))*100.0) |
---|
423 | : 100.0); |
---|
424 | } |
---|
425 | |
---|
426 | if (opt_reset && username) { |
---|
427 | _ds_userdir_path(filename, _ds_read_attribute(agent_config, "Home"), |
---|
428 | username, "rstats"); |
---|
429 | _ds_prepare_path_for (filename); |
---|
430 | file = fopen (filename, "w"); |
---|
431 | if (file == NULL) |
---|
432 | { |
---|
433 | LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror (errno)); |
---|
434 | if (MTX) |
---|
435 | dspam_destroy (MTX); |
---|
436 | open_mtx = NULL; |
---|
437 | return EFILE; |
---|
438 | } |
---|
439 | |
---|
440 | fprintf (file, "%ld,%ld,%ld,%ld,%ld,%ld\n", |
---|
441 | MAX(0,(tptr->spam_learned + tptr->spam_classified) - |
---|
442 | (tptr->spam_misclassified + tptr->spam_corpusfed)), |
---|
443 | MAX(0,(tptr->innocent_learned + tptr->innocent_classified) - |
---|
444 | (tptr->innocent_misclassified + tptr->innocent_corpusfed)), |
---|
445 | tptr->spam_misclassified, |
---|
446 | tptr->innocent_misclassified, |
---|
447 | tptr->spam_corpusfed, |
---|
448 | tptr->innocent_corpusfed); |
---|
449 | fclose(file); |
---|
450 | } |
---|
451 | |
---|
452 | if (MTX) |
---|
453 | dspam_destroy (MTX); |
---|
454 | open_mtx = NULL; |
---|
455 | return 0; |
---|
456 | } |
---|
457 | |
---|
458 | void |
---|
459 | dieout (int signal) |
---|
460 | { |
---|
461 | signal = signal; /* Keep compile happy */ |
---|
462 | fprintf (stderr, "terminated.\n"); |
---|
463 | if (open_ctx != NULL) |
---|
464 | dspam_destroy (open_ctx); |
---|
465 | if (open_mtx != NULL) |
---|
466 | dspam_destroy (open_mtx); |
---|
467 | _ds_destroy_config(agent_config); |
---|
468 | exit (EXIT_SUCCESS); |
---|
469 | } |
---|
470 | |
---|
471 | void |
---|
472 | usage (void) |
---|
473 | { |
---|
474 | (void)fprintf (stderr, |
---|
475 | "usage: dspam_stats [-h]|[--profile=PROFILE] [-HrsSt] [user [user...]]\n\ |
---|
476 | \tPrint dspam statistics for users.\n\ |
---|
477 | \tIf no users are specified, stats for all users are printed.\n\ |
---|
478 | \t-h: print this message\n\ |
---|
479 | \t-H: print stats in \"human friendly\" format\n\ |
---|
480 | \t-r: Resets the current snapshot\n\ |
---|
481 | \t-s: Displays stats since last snapshot (instead of since epoch)\n\ |
---|
482 | \t-S: Displays accuracy percentages in addition to stats\n\ |
---|
483 | \t-t: Displays a total of all statistics displayed\n"); |
---|
484 | _ds_destroy_config(agent_config); |
---|
485 | exit(EXIT_FAILURE); |
---|
486 | } |
---|