/* $Id: util.c,v 1.269 2011/06/28 00:13:48 sbajic Exp $ */ /* DSPAM COPYRIGHT (C) 2002-2012 DSPAM PROJECT This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #ifndef _WIN32 # include #endif #include #include #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #ifdef TIME_WITH_SYS_TIME # include # include #else # ifdef HAVE_SYS_TIME_H # include # else # include # endif #endif #include "language.h" #include "config.h" #include "util.h" #include "libdspam.h" #ifdef _WIN32 #include #define mkdir(filename, perm) _mkdir(filename) #endif #ifndef HAVE_STRTOK_R char * strtok_r(char *s1, const char *s2, char **lasts) { char *ret; if (s1 == NULL) s1 = *lasts; while(*s1 && strchr(s2, *s1)) ++s1; if(*s1 == '\0') return NULL; ret = s1; while(*s1 && !strchr(s2, *s1)) ++s1; if(*s1) *s1++ = '\0'; *lasts = s1; return ret; } #endif /* HAVE_STRTOK_R */ double _ds_gettime(void) { double t; #ifdef _WIN32 t = GetTickCount()/1000.; #else /* !_WIN32 */ struct timeval tv; if (gettimeofday(&tv, NULL) != -1 ) t = tv.tv_usec/1000000.0 + tv.tv_sec; else t = 0.; #endif /* _WIN32/!_WIN32 */ return t; } /* Compliments of Jay Freeman */ #ifndef HAVE_STRSEP char * strsep (char **stringp, const char *delim) { char *ret = *stringp; if (ret == NULL) return (NULL); if ((*stringp = strpbrk (*stringp, delim)) != NULL) *((*stringp)++) = '\0'; return (ret); } #endif void chomp (char *string) { int len; if (string == NULL) return; len = strlen (string); if (len && string[len - 1] == 10) { string[len - 1] = 0; len--; } if (len && string[len - 1] == 13) string[len - 1] = 0; return; } char * ltrim (char *str) { char *p; if (!str || !str[0]) return str; for (p = str; isspace ((int) *p); ++p) { /* do nothing */ } if (p > str) strcpy (str, p); /* __STRCPY_CHECKED__ */ return str; } char * rtrim (char *str) { size_t offset; char *p; if (!str || !str[0]) return str; offset = strlen (str); p = str + offset - 1; /* now p points to the last character in * string */ for (; p >= str && isspace ((int) *p); --p) { *p = 0; } return str; } #ifndef HAVE_STRLCPY /* * Appends src to string dst of size siz (unlike strncat, siz is the * full size of dst, not space left). At most siz-1 characters * will be copied. Always NUL terminates (unless siz <= strlen(dst)). * Returns strlen(src) + MIN(siz, strlen(initial dst)). * If retval >= siz, truncation occurred. */ size_t strlcat (dst, src, siz) char *dst; const char *src; size_t siz; { register char *d = dst; register const char *s = src; register size_t n = siz; size_t dlen; /* Find the end of dst and adjust bytes left but don't go past end */ while (n-- != 0 && *d != '\0') d++; dlen = d - dst; n = siz - dlen; if (n == 0) return (dlen + strlen (s)); while (*s != '\0') { if (n != 1) { *d++ = *s; n--; } s++; } *d = '\0'; return (dlen + (s - src)); /* count does not include NUL */ } /* * Copy src to string dst of size siz. At most siz-1 characters * will be copied. Always NUL terminates (unless siz == 0). * Returns strlen(src); if retval >= siz, truncation occurred. */ size_t strlcpy (dst, src, siz) char *dst; const char *src; size_t siz; { register char *d = dst; register const char *s = src; register size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0 && --n != 0) { do { if ((*d++ = *s++) == 0) break; } while (--n != 0); } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return (s - src - 1); /* count does not include NUL */ } #endif const char * _ds_userdir_path (char *path, const char *home, const char *filename, const char *extension) { char username[MAX_FILENAME_LENGTH]; char userpath[MAX_FILENAME_LENGTH]; #ifdef DOMAINSCALE char *f, *domain, *user; char *ptrptr; #endif #ifdef HOMEDIR struct passwd *p; #if defined(_REENTRANT) && defined(HAVE_GETPWNAM_R) struct passwd pwbuf; char buf[1024]; #endif char userhome[MAX_FILENAME_LENGTH]; #endif if (filename == NULL || filename[0] == 0) { path[0] = 0; return path; } #ifdef HOMEDIR #if defined(_REENTRANT) && defined(HAVE_GETPWNAM_R) if (getpwnam_r(filename, &pwbuf, buf, sizeof(buf), &p)) p = NULL; #else p = getpwnam(filename); #endif if (p == NULL) strcpy(userhome, home); else strlcpy(userhome, p->pw_dir, sizeof(userhome)); if (extension != NULL && (!strcmp (extension, "nodspam") || !strcmp (extension, "dspam"))) { if (p != NULL) { snprintf (path, MAX_FILENAME_LENGTH, "%s/.%s", p->pw_dir, extension); #ifdef DEBUG LOGDEBUG ("using %s as path", path); #endif return path; } } #endif /* HOMEDIR */ #ifdef DOMAINSCALE f = strdup(filename); user = strtok_r(f, "@", &ptrptr); domain = strtok_r(NULL, "@", &ptrptr); if (domain == NULL) domain = "local"; snprintf(userpath, MAX_FILENAME_LENGTH, "%s/%s", domain, user); strlcpy(username, user, MAX_FILENAME_LENGTH); free(f); #else strlcpy(username, filename, MAX_FILENAME_LENGTH); strcpy(userpath, username); #endif /* Use home/opt-in/ and home/opt-out/ to store opt files, instead of each user's directory */ if (extension != NULL && (!strcmp (extension, "nodspam") || !strcmp (extension, "dspam"))) { snprintf (path, MAX_FILENAME_LENGTH, "%s/opt-%s/%s.%s", home, (!strcmp(extension, "nodspam")) ? "out" : "in", userpath, extension); #ifdef DEBUG LOGDEBUG ("using %s as path", path); #endif return path; } #ifdef LARGESCALE if (filename[1] != 0) { if (extension == NULL) { snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s", home, filename[0], filename[1], filename); } else { if (extension[0] == 0) snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s/%s", home, filename[0], filename[1], filename, filename); else snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%c/%s/%s.%s", home, filename[0], filename[1], filename, filename, extension); } } else { if (extension == NULL) { snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s", home, filename[0], filename); } else { if (extension[0] == 0) snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s/%s", home, filename[0], filename, filename); else snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%c/%s/%s.%s", home, filename[0], filename, filename, extension); } } #else if (extension == NULL) { #ifdef HOMEDIR snprintf (path, MAX_FILENAME_LENGTH, "%s/.dspam", userhome); #else snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%s", home, userpath); #endif } else { #ifdef HOMEDIR snprintf(path, MAX_FILENAME_LENGTH, "%s/.dspam/%s.%s", userhome, username, extension); #else snprintf (path, MAX_FILENAME_LENGTH, "%s/data/%s/%s.%s", home, userpath, username, extension); #endif } #endif return path; } int _ds_prepare_path_for (const char *filename) { char path[MAX_FILENAME_LENGTH]; char *dir, *file; char *file_buffer_start; struct stat s; if (!filename) return EINVAL; file = strdup (filename); if (!file) { LOG (LOG_ERR, ERR_MEM_ALLOC); return EFAILURE; } #ifdef _WIN32 /* Windows uses both slash and backslash as path separators while the code below only deals with slashes -- make it work by adjusting the path. */ { char *p; for ( p = strchr(file, '\\'); p; p = strchr(p + 1, '\\') ) { *p = '/'; } } #endif file_buffer_start = file; path[0] = 0; dir = strsep (&file, "/"); while (dir != NULL) { strlcat (path, dir, sizeof (path)); dir = strsep (&file, "/"); #ifdef _WIN32 /* don't try to create root directory of a drive */ if ( path[2] != '\0' || path[1] != ':' ) #endif { #ifdef EXT_LOOKUP /* don't create users data dir if user verification is required */ if (dir != NULL && stat (path, &s) && path[0] != 0 && verified_user == 1) #else if (dir != NULL && stat (path, &s) && path[0] != 0) #endif { int x; LOGDEBUG ("creating directory '%s'", path); x = mkdir (path, 0770); if (x) { LOG(LOG_ERR, ERR_IO_DIR_CREATE, path, strerror (errno)); free (file_buffer_start); return EFILE; } } } strlcat (path, "/", sizeof (path)); } free (file_buffer_start); return 0; } int lc (char *buff, const char *string) { char *buffer; int i, j = 0; int len = strlen (string); buffer = malloc (len + 1); if (len == 0) { buff[0] = 0; free (buffer); return 0; } for (i = 0; i < len; i++) { if (isupper ((int) string[i])) { buffer[i] = tolower (string[i]); j++; } else { buffer[i] = string[i]; } } buffer[len] = 0; strcpy (buff, buffer); free (buffer); return j; } unsigned long long _ds_getcrc64 (const char *s) { static unsigned long long CRCTable[256]; unsigned long long crc = 0; static int init = 0; if (!init) { int i; init = 1; for (i = 0; i <= 255; i++) { int j; unsigned long long part = i; for (j = 0; j < 8; j++) { if (part & 1) part = (part >> 1) ^ POLY64REV; else part >>= 1; } CRCTable[i] = part; } } for (; *s; s++) { unsigned long long temp1 = crc >> 8; unsigned long long temp2 = CRCTable[(crc ^ (unsigned long long) *s) & 0xff]; crc = temp1 ^ temp2; } return crc; } int _ds_compute_weight(const char *token) { int complexity = _ds_compute_complexity(token); int sparse = _ds_compute_sparse(token); /* * Mathematically correct algorithm (but slower): * * int weight = 0; * * if (complexity >= 1 && complexity <= SPARSE_WINDOW_SIZE) { * weight = (int)pow(2.0,(2*(complexity-sparse-1))); * if (weight < 1) * return 1; * else * return weight; * } */ /* * The same (+/-) as above but without using an algorithm (and * therefore faster then calling each time the pow() function). * Using reverse order of complexity to speed up processing. * */ if (complexity == 5) { if (sparse == 1) /* the * brown fox jumped | the quick * fox jumped | the quick brown * jumped */ return 64; if (sparse == 2) /* the * * fox jumped | the * brown * jumped | the quick * * jumped */ return 16; if (sparse == 3) /* the * * * jumped */ return 4; if (sparse == 0) /* the quick brown fox jumped */ return 256; } if (complexity == 4) { if (sparse == 1) /* quick * fox jumped | quick brown * jumped */ return 16; if (sparse == 2) /* quick * * jumped */ return 4; if (sparse == 0) /* quick brown fox jumped */ return 64; } if (complexity == 3) { if (sparse == 1) /* brown * jumped */ return 4; if (sparse == 0) /* brown fox jumped */ return 16; } if (complexity == 2) { if (sparse == 0) /* fox jumped */ return 4; } if (complexity == 1) { if (sparse == 0) /* jumped */ return 1; } LOG(LOG_WARNING, "_ds_compute_weight: no rule to compute markovian weight for '%s'; complexity: %d; sparse: %d", token, complexity, sparse); return 1; } int _ds_compute_weight_osb(const char *token) { /* We have two possibilities here to compute the weight. * * One would be to use the original code found in older * CRM114 and compute (have larger weights for 'shorter' * (narrower) matches): * (SPARSE_WINDOW_SIZE-sparse)**(SPARSE_WINDOW_SIZE-sparse) * * Or use newer algorithm found in CRM114 (have larger * weights for 'longer' (wider) matches): * complexity**complexity * * We are going to use here the later one. */ int complexity = _ds_compute_complexity(token); /* * Mathematically correct algorithm (but slower): * * int weight = 1; * * if (complexity >= 1 && complexity <= SPARSE_WINDOW_SIZE) { * weight = (int)pow(complexity,complexity); * if (weight < 1) * weight = 1; * } * return weight; */ /* * The same (+/-) as above but without using an algorithm (and * therefore faster then calling each time the pow() function). * */ if (complexity == 5) { /* the * * * jumped */ return 3125; } if (complexity == 4) { /* quick * * jumped */ return 256; } if (complexity == 3) { /* brown * jumped */ return 27; } if (complexity == 2) { /* fox jumped */ return 4; } LOG(LOG_WARNING, "_ds_compute_weight_osb: no rule to compute OSB/OSBF/WINNOW weight for '%s'; complexity: %d", token, complexity); return 1; } int _ds_compute_sparse(const char *token) { int sparse = 0, i; if (!strncmp(token, "#+", 2)) sparse++; if (strlen(token)>=2 && !strncmp((token+strlen(token))-2, "+#", 2)) sparse++; for(i=0;token[i];i++) { if (!strncmp(token+i, "+#+", 3)) { sparse++; i++; } } return sparse; } int _ds_compute_complexity(const char *token) { int i, complexity = 1; if (token == NULL) return 1; for(i=0;token[i];i++) { if (token[i] == '+') { complexity++; i++; } } return complexity; } int _ds_extract_address(char *buf, const char *address, size_t len) { char *str = strdup(address); char *x, *y; if (str == NULL) return EUNKNOWN; x = strchr(str, '<'); if (!x) { free(str); return EFAILURE; } y = strchr(x, '>'); if (y) y[0] = 0; strlcpy(buf, x+1, len); free(str); return 0; } double chi2Q (double x, int v) { int i; double m, s, t; m = x / 2.0; s = exp(-m); t = s; for(i=1;i<(v/2);i++) { t *= m / i; s += t; } return MIN(s, 1.0); } void timeout(void) {} int _ds_get_fcntl_lock(int fd) { #ifdef _WIN32 return 0; #else struct flock f; int r; f.l_type = F_WRLCK; f.l_whence = SEEK_SET; f.l_start = 0; f.l_len = 0; #if defined __GLIBC__ && __GLIBC__ >= 2 signal(SIGALRM, (sighandler_t)timeout); #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) signal(SIGALRM, (sig_t)timeout); #else signal(SIGALRM, (void *)timeout); #endif alarm(300); r=fcntl(fd, F_SETLKW, &f); alarm(0); signal(SIGALRM,SIG_DFL); return r; #endif } int _ds_free_fcntl_lock(int fd) { #ifdef _WIN32 return 0; #else struct flock f; f.l_type = F_UNLCK; f.l_whence = SEEK_SET; f.l_start = 0; f.l_len = 0; return fcntl(fd, F_SETLKW, &f); #endif } int _ds_pow(int base, unsigned int exp) { int result = 1; while (exp > 0) { if (exp & 1) result *= base; base *= base; exp /= 2; } return result; } int _ds_pow2(int exp) { return _ds_pow(2, exp); } float _ds_round(float n) { int r = (n*100); while(r % 5) r++; return (r/100.0); } #ifndef HAVE_STRCASESTR char * strcasestr(s, find) const char *s, *find; { char c; if ((c = *find++) != 0) { size_t len; c = tolower((unsigned char)c); len = strlen(find); do { char sc; do { if ((sc = *s++) == 0) return (NULL); } while ((char)tolower((unsigned char)sc) != c); } while (strncasecmp(s, find, len) != 0); s--; } return ((char *)s); } #endif #ifndef HAVE_INET_NTOA_R /* * Copyright (c) 1983, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ char * inet_ntoa_r(struct in_addr in, char *buf, int len) { char b[18]; register char *p; p = (char *)∈ #define UC(b) (((int)b)&0xff) snprintf(b, sizeof(b), "%d.%d.%d.%d", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3])); if ((size_t)len <= strlen(b)) { errno = ERANGE; return(NULL); } return strcpy(buf, b); } #endif /* * _ds_validate_address() - Validate a email address * * DESCRIPTION * validate the supplied email address * * INPUT ARGUMENTS * const char *address pointer to email address * * NOTES * This function is NOT RFC 821/822 complete. However... most invalid * RFC 821/822 email addresses should be caputred by this function. * Extend this function if you need to capture more invalid email addresses. * * RETURN VALUES * returns 1 if email address is valid * returns 0 if email address is not valid * */ int _ds_validate_address(const char *address) { int count = 0; const char *p; char *email; const char *domain; static char *rfc822_specials = "()<>@,;:\\\"[]"; /* remove < at the beginning and > at the end of email address */ email = ALLTRIM(strdup(address)); if (*email == '<' && *(email + strlen(email) - 1) == '>') { *(email + strlen(email) - 1) = 0; email++; } /* First validate the local part (local_part@domain_part.tld) */ if (*email == '.') return 0; for (p = email; *p; p++) { if (*p == '\"' && (p == email || *(p - 1) == '.' || *(p - 1) == '\"')) { while (*++p) { if (*p == '\"') break; if (*p == '\\' && (*++p == ' ')) continue; if (*p < ' ' || *p >= 127) return 0; } if (!*p++) return 0; if (*p == '@') break; if (*p != '.') return 0; continue; } if (*p == '@') break; if (*p == '.' && (*++p == '.')) return 0; if (*p <= ' ' || *p >= 127) return 0; if (strchr(rfc822_specials, *p)) return 0; } if (p == email || *(p - 1) == '.') return 0; /* Next validate the domain part (local_part@domain_part.tld) */ if (!*(domain = ++p)) return 0; do { if (*p == '.') { if (p == domain || *(p - 1) == '.') return 0; count++; } if (*p <= ' ' || *p >= 127) return 0; if (*p == '.' && (*++p == '.')) return 0; if (strchr(rfc822_specials, *p)) return 0; } while (*++p); return (count >= 1); }