1 | /* $Id: util.c,v 1.269 2011/06/28 00:13:48 sbajic Exp $ */ |
---|
2 | |
---|
3 | /* |
---|
4 | DSPAM |
---|
5 | COPYRIGHT (C) 2002-2012 DSPAM PROJECT |
---|
6 | |
---|
7 | This program is free software: you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Affero General Public License as |
---|
9 | published by the Free Software Foundation, either version 3 of the |
---|
10 | License, or (at your option) any later version. |
---|
11 | |
---|
12 | This program is distributed in the hope that it will be useful, |
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | GNU Affero General Public License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Affero General Public License |
---|
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
19 | |
---|
20 | */ |
---|
21 | |
---|
22 | #ifdef HAVE_CONFIG_H |
---|
23 | #include <auto-config.h> |
---|
24 | #endif |
---|
25 | |
---|
26 | #include <string.h> |
---|
27 | #include <stdlib.h> |
---|
28 | #include <ctype.h> |
---|
29 | #include <errno.h> |
---|
30 | #include <error.h> |
---|
31 | #ifndef _WIN32 |
---|
32 | # include <pwd.h> |
---|
33 | #endif |
---|
34 | #include <sys/types.h> |
---|
35 | #include <sys/stat.h> |
---|
36 | #ifdef HAVE_UNISTD_H |
---|
37 | # include <unistd.h> |
---|
38 | #endif |
---|
39 | #include <stdio.h> |
---|
40 | #include <math.h> |
---|
41 | #include <fcntl.h> |
---|
42 | #include <signal.h> |
---|
43 | |
---|
44 | #ifdef TIME_WITH_SYS_TIME |
---|
45 | # include <sys/time.h> |
---|
46 | # include <time.h> |
---|
47 | #else |
---|
48 | # ifdef HAVE_SYS_TIME_H |
---|
49 | # include <sys/time.h> |
---|
50 | # else |
---|
51 | # include <time.h> |
---|
52 | # endif |
---|
53 | #endif |
---|
54 | |
---|
55 | #include "language.h" |
---|
56 | #include "config.h" |
---|
57 | #include "util.h" |
---|
58 | #include "libdspam.h" |
---|
59 | |
---|
60 | #ifdef _WIN32 |
---|
61 | #include <direct.h> |
---|
62 | |
---|
63 | #define mkdir(filename, perm) _mkdir(filename) |
---|
64 | #endif |
---|
65 | |
---|
66 | #ifndef HAVE_STRTOK_R |
---|
67 | char * |
---|
68 | strtok_r(char *s1, const char *s2, char **lasts) |
---|
69 | { |
---|
70 | char *ret; |
---|
71 | |
---|
72 | if (s1 == NULL) |
---|
73 | s1 = *lasts; |
---|
74 | while(*s1 && strchr(s2, *s1)) |
---|
75 | ++s1; |
---|
76 | if(*s1 == '\0') |
---|
77 | return NULL; |
---|
78 | ret = s1; |
---|
79 | while(*s1 && !strchr(s2, *s1)) |
---|
80 | ++s1; |
---|
81 | if(*s1) |
---|
82 | *s1++ = '\0'; |
---|
83 | *lasts = s1; |
---|
84 | return ret; |
---|
85 | } |
---|
86 | #endif /* HAVE_STRTOK_R */ |
---|
87 | |
---|
88 | double _ds_gettime(void) |
---|
89 | { |
---|
90 | double t; |
---|
91 | |
---|
92 | #ifdef _WIN32 |
---|
93 | t = GetTickCount()/1000.; |
---|
94 | #else /* !_WIN32 */ |
---|
95 | struct timeval tv; |
---|
96 | if (gettimeofday(&tv, NULL) != -1 ) |
---|
97 | t = tv.tv_usec/1000000.0 + tv.tv_sec; |
---|
98 | else |
---|
99 | t = 0.; |
---|
100 | #endif /* _WIN32/!_WIN32 */ |
---|
101 | |
---|
102 | return t; |
---|
103 | } |
---|
104 | |
---|
105 | /* Compliments of Jay Freeman <saurik@saurik.com> */ |
---|
106 | |
---|
107 | #ifndef HAVE_STRSEP |
---|
108 | char * |
---|
109 | strsep (char **stringp, const char *delim) |
---|
110 | { |
---|
111 | char *ret = *stringp; |
---|
112 | if (ret == NULL) |
---|
113 | return (NULL); |
---|
114 | if ((*stringp = strpbrk (*stringp, delim)) != NULL) |
---|
115 | *((*stringp)++) = '\0'; |
---|
116 | return (ret); |
---|
117 | } |
---|
118 | #endif |
---|
119 | |
---|
120 | void |
---|
121 | chomp (char *string) |
---|
122 | { |
---|
123 | int len; |
---|
124 | if (string == NULL) |
---|
125 | return; |
---|
126 | len = strlen (string); |
---|
127 | if (len && string[len - 1] == 10) |
---|
128 | { |
---|
129 | string[len - 1] = 0; |
---|
130 | len--; |
---|
131 | } |
---|
132 | if (len && string[len - 1] == 13) |
---|
133 | string[len - 1] = 0; |
---|
134 | return; |
---|
135 | } |
---|
136 | |
---|
137 | char * |
---|
138 | ltrim (char *str) |
---|
139 | { |
---|
140 | char *p; |
---|
141 | if (!str || !str[0]) |
---|
142 | return str; |
---|
143 | for (p = str; isspace ((int) *p); ++p) |
---|
144 | { |
---|
145 | /* do nothing */ |
---|
146 | } |
---|
147 | if (p > str) |
---|
148 | strcpy (str, p); /* __STRCPY_CHECKED__ */ |
---|
149 | return str; |
---|
150 | } |
---|
151 | |
---|
152 | char * |
---|
153 | rtrim (char *str) |
---|
154 | { |
---|
155 | size_t offset; |
---|
156 | char *p; |
---|
157 | if (!str || !str[0]) |
---|
158 | return str; |
---|
159 | offset = strlen (str); |
---|
160 | p = str + offset - 1; /* now p points to the last character in |
---|
161 | * string */ |
---|
162 | for (; p >= str && isspace ((int) *p); --p) |
---|
163 | { |
---|
164 | *p = 0; |
---|
165 | } |
---|
166 | return str; |
---|
167 | } |
---|
168 | |
---|
169 | #ifndef HAVE_STRLCPY |
---|
170 | /* |
---|
171 | * Appends src to string dst of size siz (unlike strncat, siz is the |
---|
172 | * full size of dst, not space left). At most siz-1 characters |
---|
173 | * will be copied. Always NUL terminates (unless siz <= strlen(dst)). |
---|
174 | * Returns strlen(src) + MIN(siz, strlen(initial dst)). |
---|
175 | * If retval >= siz, truncation occurred. |
---|
176 | */ |
---|
177 | size_t |
---|
178 | strlcat (dst, src, siz) |
---|
179 | char *dst; |
---|
180 | const char *src; |
---|
181 | size_t siz; |
---|
182 | { |
---|
183 | register char *d = dst; |
---|
184 | register const char *s = src; |
---|
185 | register size_t n = siz; |
---|
186 | size_t dlen; |
---|
187 | |
---|
188 | /* Find the end of dst and adjust bytes left but don't go past end */ |
---|
189 | while (n-- != 0 && *d != '\0') |
---|
190 | d++; |
---|
191 | dlen = d - dst; |
---|
192 | n = siz - dlen; |
---|
193 | |
---|
194 | if (n == 0) |
---|
195 | return (dlen + strlen (s)); |
---|
196 | while (*s != '\0') |
---|
197 | { |
---|
198 | if (n != 1) |
---|
199 | { |
---|
200 | *d++ = *s; |
---|
201 | n--; |
---|
202 | } |
---|
203 | s++; |
---|
204 | } |
---|
205 | *d = '\0'; |
---|
206 | |
---|
207 | return (dlen + (s - src)); /* count does not include NUL */ |
---|
208 | } |
---|
209 | |
---|
210 | /* |
---|
211 | * Copy src to string dst of size siz. At most siz-1 characters |
---|
212 | * will be copied. Always NUL terminates (unless siz == 0). |
---|
213 | * Returns strlen(src); if retval >= siz, truncation occurred. |
---|
214 | */ |
---|
215 | size_t |
---|
216 | strlcpy (dst, src, siz) |
---|
217 | char *dst; |
---|
218 | const char *src; |
---|
219 | size_t siz; |
---|
220 | { |
---|
221 | register char *d = dst; |
---|
222 | register const char *s = src; |
---|
223 | register size_t n = siz; |
---|
224 | |
---|
225 | /* Copy as many bytes as will fit */ |
---|
226 | if (n != 0 && --n != 0) |
---|
227 | { |
---|
228 | do |
---|
229 | { |
---|
230 | if ((*d++ = *s++) == 0) |
---|
231 | break; |
---|
232 | } |
---|
233 | while (--n != 0); |
---|
234 | } |
---|
235 | |
---|
236 | /* Not enough room in dst, add NUL and traverse rest of src */ |
---|
237 | if (n == 0) |
---|
238 | { |
---|
239 | if (siz != 0) |
---|
240 | *d = '\0'; /* NUL-terminate dst */ |
---|
241 | while (*s++) |
---|
242 | ; |
---|
243 | } |
---|
244 | |
---|
245 | return (s - src - 1); /* count does not include NUL */ |
---|
246 | } |
---|
247 | #endif |
---|
248 | |
---|
249 | const char * _ds_userdir_path (char *path, const char *home, const char *filename, const char *extension) |
---|
250 | { |
---|
251 | char username[MAX_FILENAME_LENGTH]; |
---|
252 | char userpath[MAX_FILENAME_LENGTH]; |
---|
253 | #ifdef DOMAINSCALE |
---|
254 | char *f, *domain, *user; |
---|
255 | char *ptrptr; |
---|
256 | #endif |
---|
257 | #ifdef HOMEDIR |
---|
258 | struct passwd *p; |
---|
259 | #if defined(_REENTRANT) && defined(HAVE_GETPWNAM_R) |
---|
260 | struct passwd pwbuf; |
---|
261 | char buf[1024]; |
---|
262 | #endif |
---|
263 | char userhome[MAX_FILENAME_LENGTH]; |
---|
264 | #endif |
---|
265 | |
---|
266 | if (filename == NULL || filename[0] == 0) |
---|
267 | { |
---|
268 | path[0] = 0; |
---|
269 | return path; |
---|
270 | } |
---|
271 | |
---|
272 | #ifdef HOMEDIR |
---|
273 | #if defined(_REENTRANT) && defined(HAVE_GETPWNAM_R) |
---|
274 | if (getpwnam_r(filename, &pwbuf, buf, sizeof(buf), &p)) |
---|
275 | p = NULL; |
---|
276 | #else |
---|
277 | p = getpwnam(filename); |
---|
278 | #endif |
---|
279 | |
---|
280 | if (p == NULL) |
---|
281 | strcpy(userhome, home); |
---|
282 | else |
---|
283 | strlcpy(userhome, p->pw_dir, sizeof(userhome)); |
---|
284 | |
---|
285 | if (extension != NULL |
---|
286 | && (!strcmp (extension, "nodspam") || !strcmp (extension, "dspam"))) |
---|
287 | { |
---|
288 | if (p != NULL) { |
---|
289 | snprintf (path, MAX_FILENAME_LENGTH, "%s/.%s", p->pw_dir, extension); |
---|
290 | #ifdef DEBUG |
---|
291 | LOGDEBUG ("using %s as path", path); |
---|
292 | #endif |
---|
293 | return path; |
---|
294 | } |
---|
295 | } |
---|
296 | #endif /* HOMEDIR */ |
---|
297 | |
---|
298 | #ifdef DOMAINSCALE |
---|
299 | f = strdup(filename); |
---|
300 | user = strtok_r(f, "@", &ptrptr); |
---|
301 | domain = strtok_r(NULL, "@", &ptrptr); |
---|
302 | |
---|
303 | if (domain == NULL) |
---|
304 | domain = "local"; |
---|
305 | snprintf(userpath, MAX_FILENAME_LENGTH, "%s/%s", domain, user); |
---|
306 | strlcpy(username, user, MAX_FILENAME_LENGTH); |
---|
307 | free(f); |
---|
308 | #else |
---|
309 | strlcpy(username, filename, MAX_FILENAME_LENGTH); |
---|
310 | strcpy(userpath, username); |
---|
311 | #endif |
---|
312 | |
---|
313 | /* Use home/opt-in/ and home/opt-out/ to store opt files, instead of |
---|
314 | each user's directory */ |
---|
315 | |
---|
316 | if (extension != NULL |
---|
317 | && (!strcmp (extension, "nodspam") || !strcmp (extension, "dspam"))) |
---|
318 | { |
---|
319 | snprintf (path, MAX_FILENAME_LENGTH, "%s/opt-%s/%s.%s", home, |
---|
320 | (!strcmp(extension, "nodspam")) ? "out" : "in", userpath, extension); |
---|
321 | #ifdef DEBUG |
---|
322 | LOGDEBUG ("using %s as path", path); |
---|
323 | #endif |
---|
324 | return path; |
---|
325 | } |
---|
326 | |
---|
327 | #ifdef LARGESCALE |
---|
328 | if (filename[1] != 0) |
---|
329 | { |
---|
330 | if (extension == NULL) |
---|
331 | { |
---|
332 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s", |
---|
333 | home, filename[0], filename[1], filename); |
---|
334 | } |
---|
335 | else |
---|
336 | { |
---|
337 | if (extension[0] == 0) |
---|
338 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s/%s", |
---|
339 | home, filename[0], filename[1], filename, filename); |
---|
340 | else |
---|
341 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s/%s.%s", |
---|
342 | home, filename[0], filename[1], filename, filename, |
---|
343 | extension); |
---|
344 | } |
---|
345 | } |
---|
346 | else |
---|
347 | { |
---|
348 | if (extension == NULL) |
---|
349 | { |
---|
350 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s", |
---|
351 | home, filename[0], filename); |
---|
352 | } |
---|
353 | else |
---|
354 | { |
---|
355 | if (extension[0] == 0) |
---|
356 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s/%s", |
---|
357 | home, filename[0], filename, filename); |
---|
358 | else |
---|
359 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s/%s.%s", |
---|
360 | home, filename[0], filename, filename, extension); |
---|
361 | } |
---|
362 | } |
---|
363 | #else |
---|
364 | if (extension == NULL) |
---|
365 | { |
---|
366 | #ifdef HOMEDIR |
---|
367 | snprintf (path, MAX_FILENAME_LENGTH, "%s/.dspam", userhome); |
---|
368 | #else |
---|
369 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%s", home, userpath); |
---|
370 | #endif |
---|
371 | } |
---|
372 | else |
---|
373 | { |
---|
374 | #ifdef HOMEDIR |
---|
375 | snprintf(path, MAX_FILENAME_LENGTH, "%s/.dspam/%s.%s", userhome, username, |
---|
376 | extension); |
---|
377 | #else |
---|
378 | snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%s/%s.%s", |
---|
379 | home, userpath, username, extension); |
---|
380 | #endif |
---|
381 | } |
---|
382 | #endif |
---|
383 | |
---|
384 | return path; |
---|
385 | } |
---|
386 | |
---|
387 | int |
---|
388 | _ds_prepare_path_for (const char *filename) |
---|
389 | { |
---|
390 | char path[MAX_FILENAME_LENGTH]; |
---|
391 | char *dir, *file; |
---|
392 | char *file_buffer_start; |
---|
393 | struct stat s; |
---|
394 | |
---|
395 | if (!filename) |
---|
396 | return EINVAL; |
---|
397 | |
---|
398 | file = strdup (filename); |
---|
399 | if (!file) { |
---|
400 | LOG (LOG_ERR, ERR_MEM_ALLOC); |
---|
401 | return EFAILURE; |
---|
402 | } |
---|
403 | |
---|
404 | #ifdef _WIN32 |
---|
405 | /* |
---|
406 | Windows uses both slash and backslash as path separators while the code |
---|
407 | below only deals with slashes -- make it work by adjusting the path. |
---|
408 | */ |
---|
409 | { |
---|
410 | char *p; |
---|
411 | for ( p = strchr(file, '\\'); p; p = strchr(p + 1, '\\') ) |
---|
412 | { |
---|
413 | *p = '/'; |
---|
414 | } |
---|
415 | } |
---|
416 | #endif |
---|
417 | |
---|
418 | file_buffer_start = file; |
---|
419 | path[0] = 0; |
---|
420 | |
---|
421 | dir = strsep (&file, "/"); |
---|
422 | while (dir != NULL) |
---|
423 | { |
---|
424 | strlcat (path, dir, sizeof (path)); |
---|
425 | dir = strsep (&file, "/"); |
---|
426 | |
---|
427 | #ifdef _WIN32 |
---|
428 | /* don't try to create root directory of a drive */ |
---|
429 | if ( path[2] != '\0' || path[1] != ':' ) |
---|
430 | #endif |
---|
431 | { |
---|
432 | #ifdef EXT_LOOKUP |
---|
433 | /* don't create users data dir if user verification is required */ |
---|
434 | if (dir != NULL && stat (path, &s) && path[0] != 0 && verified_user == 1) |
---|
435 | #else |
---|
436 | if (dir != NULL && stat (path, &s) && path[0] != 0) |
---|
437 | #endif |
---|
438 | { |
---|
439 | int x; |
---|
440 | LOGDEBUG ("creating directory '%s'", path); |
---|
441 | x = mkdir (path, 0770); |
---|
442 | if (x) |
---|
443 | { |
---|
444 | LOG(LOG_ERR, ERR_IO_DIR_CREATE, path, strerror (errno)); |
---|
445 | free (file_buffer_start); |
---|
446 | return EFILE; |
---|
447 | } |
---|
448 | } |
---|
449 | } |
---|
450 | |
---|
451 | strlcat (path, "/", sizeof (path)); |
---|
452 | } |
---|
453 | free (file_buffer_start); |
---|
454 | return 0; |
---|
455 | } |
---|
456 | |
---|
457 | int |
---|
458 | lc (char *buff, const char *string) |
---|
459 | { |
---|
460 | char *buffer; |
---|
461 | int i, j = 0; |
---|
462 | int len = strlen (string); |
---|
463 | |
---|
464 | buffer = malloc (len + 1); |
---|
465 | |
---|
466 | if (len == 0) |
---|
467 | { |
---|
468 | buff[0] = 0; |
---|
469 | free (buffer); |
---|
470 | return 0; |
---|
471 | } |
---|
472 | |
---|
473 | for (i = 0; i < len; i++) |
---|
474 | { |
---|
475 | if (isupper ((int) string[i])) |
---|
476 | { |
---|
477 | buffer[i] = tolower (string[i]); |
---|
478 | j++; |
---|
479 | } |
---|
480 | else |
---|
481 | { |
---|
482 | buffer[i] = string[i]; |
---|
483 | } |
---|
484 | } |
---|
485 | |
---|
486 | buffer[len] = 0; |
---|
487 | strcpy (buff, buffer); |
---|
488 | |
---|
489 | free (buffer); |
---|
490 | return j; |
---|
491 | } |
---|
492 | |
---|
493 | unsigned long long |
---|
494 | _ds_getcrc64 (const char *s) |
---|
495 | { |
---|
496 | static unsigned long long CRCTable[256]; |
---|
497 | unsigned long long crc = 0; |
---|
498 | static int init = 0; |
---|
499 | |
---|
500 | if (!init) |
---|
501 | { |
---|
502 | int i; |
---|
503 | init = 1; |
---|
504 | for (i = 0; i <= 255; i++) |
---|
505 | { |
---|
506 | int j; |
---|
507 | unsigned long long part = i; |
---|
508 | for (j = 0; j < 8; j++) |
---|
509 | { |
---|
510 | if (part & 1) |
---|
511 | part = (part >> 1) ^ POLY64REV; |
---|
512 | else |
---|
513 | part >>= 1; |
---|
514 | } |
---|
515 | CRCTable[i] = part; |
---|
516 | } |
---|
517 | } |
---|
518 | for (; *s; s++) |
---|
519 | { |
---|
520 | unsigned long long temp1 = crc >> 8; |
---|
521 | unsigned long long temp2 = |
---|
522 | CRCTable[(crc ^ (unsigned long long) *s) & 0xff]; |
---|
523 | crc = temp1 ^ temp2; |
---|
524 | } |
---|
525 | |
---|
526 | return crc; |
---|
527 | } |
---|
528 | |
---|
529 | int _ds_compute_weight(const char *token) { |
---|
530 | int complexity = _ds_compute_complexity(token); |
---|
531 | int sparse = _ds_compute_sparse(token); |
---|
532 | |
---|
533 | /* |
---|
534 | * Mathematically correct algorithm (but slower): |
---|
535 | * |
---|
536 | * int weight = 0; |
---|
537 | * |
---|
538 | * if (complexity >= 1 && complexity <= SPARSE_WINDOW_SIZE) { |
---|
539 | * weight = (int)pow(2.0,(2*(complexity-sparse-1))); |
---|
540 | * if (weight < 1) |
---|
541 | * return 1; |
---|
542 | * else |
---|
543 | * return weight; |
---|
544 | * } |
---|
545 | */ |
---|
546 | |
---|
547 | |
---|
548 | /* |
---|
549 | * The same (+/-) as above but without using an algorithm (and |
---|
550 | * therefore faster then calling each time the pow() function). |
---|
551 | * Using reverse order of complexity to speed up processing. |
---|
552 | * |
---|
553 | */ |
---|
554 | |
---|
555 | if (complexity == 5) { |
---|
556 | if (sparse == 1) /* the * brown fox jumped | the quick * fox jumped | the quick brown * jumped */ |
---|
557 | return 64; |
---|
558 | if (sparse == 2) /* the * * fox jumped | the * brown * jumped | the quick * * jumped */ |
---|
559 | return 16; |
---|
560 | if (sparse == 3) /* the * * * jumped */ |
---|
561 | return 4; |
---|
562 | if (sparse == 0) /* the quick brown fox jumped */ |
---|
563 | return 256; |
---|
564 | } |
---|
565 | |
---|
566 | if (complexity == 4) { |
---|
567 | if (sparse == 1) /* quick * fox jumped | quick brown * jumped */ |
---|
568 | return 16; |
---|
569 | if (sparse == 2) /* quick * * jumped */ |
---|
570 | return 4; |
---|
571 | if (sparse == 0) /* quick brown fox jumped */ |
---|
572 | return 64; |
---|
573 | } |
---|
574 | |
---|
575 | if (complexity == 3) { |
---|
576 | if (sparse == 1) /* brown * jumped */ |
---|
577 | return 4; |
---|
578 | if (sparse == 0) /* brown fox jumped */ |
---|
579 | return 16; |
---|
580 | } |
---|
581 | |
---|
582 | if (complexity == 2) { |
---|
583 | if (sparse == 0) /* fox jumped */ |
---|
584 | return 4; |
---|
585 | } |
---|
586 | |
---|
587 | if (complexity == 1) { |
---|
588 | if (sparse == 0) /* jumped */ |
---|
589 | return 1; |
---|
590 | } |
---|
591 | |
---|
592 | LOG(LOG_WARNING, "_ds_compute_weight: no rule to compute markovian weight for '%s'; complexity: %d; sparse: %d", token, complexity, sparse); |
---|
593 | return 1; |
---|
594 | } |
---|
595 | int _ds_compute_weight_osb(const char *token) { |
---|
596 | /* We have two possibilities here to compute the weight. |
---|
597 | * |
---|
598 | * One would be to use the original code found in older |
---|
599 | * CRM114 and compute (have larger weights for 'shorter' |
---|
600 | * (narrower) matches): |
---|
601 | * (SPARSE_WINDOW_SIZE-sparse)**(SPARSE_WINDOW_SIZE-sparse) |
---|
602 | * |
---|
603 | * Or use newer algorithm found in CRM114 (have larger |
---|
604 | * weights for 'longer' (wider) matches): |
---|
605 | * complexity**complexity |
---|
606 | * |
---|
607 | * We are going to use here the later one. |
---|
608 | */ |
---|
609 | |
---|
610 | int complexity = _ds_compute_complexity(token); |
---|
611 | |
---|
612 | /* |
---|
613 | * Mathematically correct algorithm (but slower): |
---|
614 | * |
---|
615 | * int weight = 1; |
---|
616 | * |
---|
617 | * if (complexity >= 1 && complexity <= SPARSE_WINDOW_SIZE) { |
---|
618 | * weight = (int)pow(complexity,complexity); |
---|
619 | * if (weight < 1) |
---|
620 | * weight = 1; |
---|
621 | * } |
---|
622 | * return weight; |
---|
623 | */ |
---|
624 | |
---|
625 | /* |
---|
626 | * The same (+/-) as above but without using an algorithm (and |
---|
627 | * therefore faster then calling each time the pow() function). |
---|
628 | * |
---|
629 | */ |
---|
630 | |
---|
631 | if (complexity == 5) { /* the * * * jumped */ |
---|
632 | return 3125; |
---|
633 | } |
---|
634 | |
---|
635 | if (complexity == 4) { /* quick * * jumped */ |
---|
636 | return 256; |
---|
637 | } |
---|
638 | |
---|
639 | if (complexity == 3) { /* brown * jumped */ |
---|
640 | return 27; |
---|
641 | } |
---|
642 | |
---|
643 | if (complexity == 2) { /* fox jumped */ |
---|
644 | return 4; |
---|
645 | } |
---|
646 | |
---|
647 | LOG(LOG_WARNING, "_ds_compute_weight_osb: no rule to compute OSB/OSBF/WINNOW weight for '%s'; complexity: %d", token, complexity); |
---|
648 | return 1; |
---|
649 | } |
---|
650 | |
---|
651 | int _ds_compute_sparse(const char *token) { |
---|
652 | int sparse = 0, i; |
---|
653 | |
---|
654 | if (!strncmp(token, "#+", 2)) |
---|
655 | sparse++; |
---|
656 | if (strlen(token)>=2 && !strncmp((token+strlen(token))-2, "+#", 2)) |
---|
657 | sparse++; |
---|
658 | for(i=0;token[i];i++) { |
---|
659 | if (!strncmp(token+i, "+#+", 3)) { |
---|
660 | sparse++; |
---|
661 | i++; |
---|
662 | } |
---|
663 | } |
---|
664 | |
---|
665 | return sparse; |
---|
666 | } |
---|
667 | |
---|
668 | int _ds_compute_complexity(const char *token) { |
---|
669 | int i, complexity = 1; |
---|
670 | |
---|
671 | if (token == NULL) |
---|
672 | return 1; |
---|
673 | |
---|
674 | for(i=0;token[i];i++) { |
---|
675 | if (token[i] == '+') { |
---|
676 | complexity++; |
---|
677 | i++; |
---|
678 | } |
---|
679 | } |
---|
680 | |
---|
681 | return complexity; |
---|
682 | } |
---|
683 | |
---|
684 | int _ds_extract_address(char *buf, const char *address, size_t len) { |
---|
685 | char *str = strdup(address); |
---|
686 | char *x, *y; |
---|
687 | |
---|
688 | if (str == NULL) |
---|
689 | return EUNKNOWN; |
---|
690 | |
---|
691 | x = strchr(str, '<'); |
---|
692 | if (!x) { |
---|
693 | free(str); |
---|
694 | return EFAILURE; |
---|
695 | } |
---|
696 | |
---|
697 | y = strchr(x, '>'); |
---|
698 | if (y) y[0] = 0; |
---|
699 | |
---|
700 | strlcpy(buf, x+1, len); |
---|
701 | free(str); |
---|
702 | return 0; |
---|
703 | } |
---|
704 | |
---|
705 | double chi2Q (double x, int v) |
---|
706 | { |
---|
707 | int i; |
---|
708 | double m, s, t; |
---|
709 | |
---|
710 | m = x / 2.0; |
---|
711 | s = exp(-m); |
---|
712 | t = s; |
---|
713 | |
---|
714 | for(i=1;i<(v/2);i++) { |
---|
715 | t *= m / i; |
---|
716 | s += t; |
---|
717 | } |
---|
718 | |
---|
719 | return MIN(s, 1.0); |
---|
720 | } |
---|
721 | |
---|
722 | void timeout(void) {} |
---|
723 | |
---|
724 | int _ds_get_fcntl_lock(int fd) { |
---|
725 | #ifdef _WIN32 |
---|
726 | return 0; |
---|
727 | #else |
---|
728 | struct flock f; |
---|
729 | int r; |
---|
730 | |
---|
731 | f.l_type = F_WRLCK; |
---|
732 | f.l_whence = SEEK_SET; |
---|
733 | f.l_start = 0; |
---|
734 | f.l_len = 0; |
---|
735 | |
---|
736 | #if defined __GLIBC__ && __GLIBC__ >= 2 |
---|
737 | signal(SIGALRM, (sighandler_t)timeout); |
---|
738 | #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) |
---|
739 | signal(SIGALRM, (sig_t)timeout); |
---|
740 | #else |
---|
741 | signal(SIGALRM, (void *)timeout); |
---|
742 | #endif |
---|
743 | alarm(300); |
---|
744 | r=fcntl(fd, F_SETLKW, &f); |
---|
745 | alarm(0); |
---|
746 | signal(SIGALRM,SIG_DFL); |
---|
747 | |
---|
748 | return r; |
---|
749 | #endif |
---|
750 | } |
---|
751 | |
---|
752 | int _ds_free_fcntl_lock(int fd) { |
---|
753 | #ifdef _WIN32 |
---|
754 | return 0; |
---|
755 | #else |
---|
756 | struct flock f; |
---|
757 | |
---|
758 | f.l_type = F_UNLCK; |
---|
759 | f.l_whence = SEEK_SET; |
---|
760 | f.l_start = 0; |
---|
761 | f.l_len = 0; |
---|
762 | |
---|
763 | return fcntl(fd, F_SETLKW, &f); |
---|
764 | #endif |
---|
765 | } |
---|
766 | |
---|
767 | int _ds_pow(int base, unsigned int exp) { |
---|
768 | int result = 1; |
---|
769 | while (exp > 0) { |
---|
770 | if (exp & 1) |
---|
771 | result *= base; |
---|
772 | base *= base; |
---|
773 | exp /= 2; |
---|
774 | } |
---|
775 | return result; |
---|
776 | } |
---|
777 | |
---|
778 | int _ds_pow2(int exp) { |
---|
779 | return _ds_pow(2, exp); |
---|
780 | } |
---|
781 | |
---|
782 | float _ds_round(float n) { |
---|
783 | int r = (n*100); |
---|
784 | while(r % 5) |
---|
785 | r++; |
---|
786 | return (r/100.0); |
---|
787 | } |
---|
788 | |
---|
789 | #ifndef HAVE_STRCASESTR |
---|
790 | char * |
---|
791 | strcasestr(s, find) |
---|
792 | const char *s, *find; |
---|
793 | { |
---|
794 | char c; |
---|
795 | |
---|
796 | if ((c = *find++) != 0) { |
---|
797 | size_t len; |
---|
798 | c = tolower((unsigned char)c); |
---|
799 | len = strlen(find); |
---|
800 | do { |
---|
801 | char sc; |
---|
802 | do { |
---|
803 | if ((sc = *s++) == 0) |
---|
804 | return (NULL); |
---|
805 | } while ((char)tolower((unsigned char)sc) != c); |
---|
806 | } while (strncasecmp(s, find, len) != 0); |
---|
807 | s--; |
---|
808 | } |
---|
809 | return ((char *)s); |
---|
810 | } |
---|
811 | #endif |
---|
812 | |
---|
813 | #ifndef HAVE_INET_NTOA_R |
---|
814 | /* |
---|
815 | * Copyright (c) 1983, 1993 |
---|
816 | * The Regents of the University of California. All rights reserved. |
---|
817 | * |
---|
818 | * Redistribution and use in source and binary forms, with or without |
---|
819 | * modification, are permitted provided that the following conditions |
---|
820 | * are met: |
---|
821 | * 1. Redistributions of source code must retain the above copyright |
---|
822 | * notice, this list of conditions and the following disclaimer. |
---|
823 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
824 | * notice, this list of conditions and the following disclaimer in the |
---|
825 | * documentation and/or other materials provided with the distribution. |
---|
826 | * 3. All advertising materials mentioning features or use of this software |
---|
827 | * must display the following acknowledgement: |
---|
828 | * This product includes software developed by the University of |
---|
829 | * California, Berkeley and its contributors. |
---|
830 | * 4. Neither the name of the University nor the names of its contributors |
---|
831 | * may be used to endorse or promote products derived from this software |
---|
832 | * without specific prior written permission. |
---|
833 | * |
---|
834 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
---|
835 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
836 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
837 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
---|
838 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
---|
839 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
---|
840 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
---|
841 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
---|
842 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
---|
843 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
---|
844 | * SUCH DAMAGE. |
---|
845 | */ |
---|
846 | |
---|
847 | char * |
---|
848 | inet_ntoa_r(struct in_addr in, char *buf, int len) |
---|
849 | { |
---|
850 | char b[18]; |
---|
851 | register char *p; |
---|
852 | |
---|
853 | p = (char *)∈ |
---|
854 | #define UC(b) (((int)b)&0xff) |
---|
855 | snprintf(b, sizeof(b), |
---|
856 | "%d.%d.%d.%d", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3])); |
---|
857 | |
---|
858 | if ((size_t)len <= strlen(b)) { |
---|
859 | errno = ERANGE; |
---|
860 | return(NULL); |
---|
861 | } |
---|
862 | |
---|
863 | return strcpy(buf, b); |
---|
864 | } |
---|
865 | #endif |
---|
866 | |
---|
867 | /* |
---|
868 | * _ds_validate_address() - Validate a email address |
---|
869 | * |
---|
870 | * DESCRIPTION |
---|
871 | * validate the supplied email address |
---|
872 | * |
---|
873 | * INPUT ARGUMENTS |
---|
874 | * const char *address pointer to email address |
---|
875 | * |
---|
876 | * NOTES |
---|
877 | * This function is NOT RFC 821/822 complete. However... most invalid |
---|
878 | * RFC 821/822 email addresses should be caputred by this function. |
---|
879 | * Extend this function if you need to capture more invalid email addresses. |
---|
880 | * |
---|
881 | * RETURN VALUES |
---|
882 | * returns 1 if email address is valid |
---|
883 | * returns 0 if email address is not valid |
---|
884 | * |
---|
885 | */ |
---|
886 | int _ds_validate_address(const char *address) { |
---|
887 | int count = 0; |
---|
888 | const char *p; |
---|
889 | char *email; |
---|
890 | const char *domain; |
---|
891 | static char *rfc822_specials = "()<>@,;:\\\"[]"; |
---|
892 | |
---|
893 | /* remove < at the beginning and > at the end of email address */ |
---|
894 | email = ALLTRIM(strdup(address)); |
---|
895 | if (*email == '<' && *(email + strlen(email) - 1) == '>') { |
---|
896 | *(email + strlen(email) - 1) = 0; |
---|
897 | email++; |
---|
898 | } |
---|
899 | |
---|
900 | /* First validate the local part (local_part@domain_part.tld) */ |
---|
901 | if (*email == '.') return 0; |
---|
902 | for (p = email; *p; p++) { |
---|
903 | if (*p == '\"' && (p == email || *(p - 1) == '.' || *(p - 1) == '\"')) { |
---|
904 | while (*++p) { |
---|
905 | if (*p == '\"') break; |
---|
906 | if (*p == '\\' && (*++p == ' ')) continue; |
---|
907 | if (*p < ' ' || *p >= 127) return 0; |
---|
908 | } |
---|
909 | if (!*p++) return 0; |
---|
910 | if (*p == '@') break; |
---|
911 | if (*p != '.') return 0; |
---|
912 | continue; |
---|
913 | } |
---|
914 | if (*p == '@') break; |
---|
915 | if (*p == '.' && (*++p == '.')) return 0; |
---|
916 | if (*p <= ' ' || *p >= 127) return 0; |
---|
917 | if (strchr(rfc822_specials, *p)) return 0; |
---|
918 | } |
---|
919 | if (p == email || *(p - 1) == '.') return 0; |
---|
920 | |
---|
921 | /* Next validate the domain part (local_part@domain_part.tld) */ |
---|
922 | if (!*(domain = ++p)) return 0; |
---|
923 | do { |
---|
924 | if (*p == '.') { |
---|
925 | if (p == domain || *(p - 1) == '.') return 0; |
---|
926 | count++; |
---|
927 | } |
---|
928 | if (*p <= ' ' || *p >= 127) return 0; |
---|
929 | if (*p == '.' && (*++p == '.')) return 0; |
---|
930 | if (strchr(rfc822_specials, *p)) return 0; |
---|
931 | } while (*++p); |
---|
932 | |
---|
933 | return (count >= 1); |
---|
934 | } |
---|