/* $Id: cssclean.c,v 1.138 2011/06/28 00:13:48 sbajic Exp $ */ /* DSPAM COPYRIGHT (C) 2002-2012 DSPAM PROJECT This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ /* cssclean.c - rebuild a hash database, omitting hapaxes */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TIME_WITH_SYS_TIME # include # include #else # ifdef HAVE_SYS_TIME_H # include # else # include # endif #endif #define READ_ATTRIB(A) _ds_read_attribute(agent_config, A) #define MATCH_ATTRIB(A, B) _ds_match_attribute(agent_config, A, B) int DO_DEBUG #ifdef DEBUG = 1 #else = 0 #endif ; #include "read_config.h" #include "hash_drv.h" #include "error.h" #include "language.h" #define SYNTAX "syntax: cssclean [filename]" int cssclean(const char *filename, int heavy); int main(int argc, char *argv[]) { int r; int heavy=0; if (argc<2) { fprintf(stderr, "%s\n", SYNTAX); exit(EXIT_FAILURE); } if ( (argc>=3) && (!strcmp(argv[2], "heavy") ) )heavy=1; agent_config = read_config(NULL); if (!agent_config) { LOG(LOG_ERR, ERR_AGENT_READ_CONFIG); exit(EXIT_FAILURE); } r = cssclean(argv[1], heavy); if (r) { fprintf(stderr, "cssclean failed on error %d\n", r); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } int cssclean(const char *filename, int heavy) { unsigned long i; hash_drv_header_t header; FILE* lockfile = NULL; void *offset; struct _hash_drv_map old, new; hash_drv_spam_record_t rec; unsigned long filepos; char *dir = NULL; char newfile[512]; struct stat st; unsigned long spam, nonspam, cntr; int drop, prb; char *filenamecopy; unsigned long hash_rec_max = HASH_REC_MAX; unsigned long max_seek = HASH_SEEK_MAX; unsigned long max_extents = 0; unsigned long extent_size = HASH_EXTENT_MAX; int pctincrease = 0; int flags = 0; int rc = EFAILURE; if (READ_ATTRIB("HashRecMax")) hash_rec_max = strtol(READ_ATTRIB("HashRecMax"), NULL, 0); if (READ_ATTRIB("HashExtentSize")) extent_size = strtol(READ_ATTRIB("HashExtentSize"), NULL, 0); if (READ_ATTRIB("HashMaxExtents")) max_extents = strtol(READ_ATTRIB("HashMaxExtents"), NULL, 0); if (READ_ATTRIB("HashPctIncrease")) { pctincrease = atoi(READ_ATTRIB("HashPctIncrease")); if (pctincrease > 100) { LOG(LOG_ERR, "HashPctIncrease out of range; ignoring"); pctincrease = 0; } } if (MATCH_ATTRIB("HashAutoExtend", "on")) flags = HMAP_AUTOEXTEND; if (READ_ATTRIB("HashMaxSeek")) max_seek = strtol(READ_ATTRIB("HashMaxSeek"), NULL, 0); if (stat(filename, &st) < 0) return EFAILURE; /* create a temporary file name */ dir = strdup(filename); if (dir == NULL) goto end; filenamecopy = strdup(filename); if (filenamecopy == NULL) goto end; snprintf(newfile, sizeof(newfile), "/%s/.dspam%u.css", dirname((char *) filenamecopy), (unsigned int) getpid()); lockfile = _hash_tools_lock_get (filename); if (lockfile == NULL) goto end; if (_hash_drv_open(filename, &old, 0, max_seek, max_extents, extent_size, pctincrease, flags)) goto end; if (_hash_drv_open(newfile, &new, hash_rec_max, max_seek, max_extents, extent_size, pctincrease, flags)) { _hash_drv_close(&old); goto end; } /* preserve counters */ memcpy(new.header, old.header, sizeof(*new.header)); if (fchown(new.fd, st.st_uid, st.st_gid) < 0) { _hash_drv_close(&new); _hash_drv_close(&old); unlink(newfile); goto end; } if (fchmod(new.fd, st.st_mode) < 0) { _hash_drv_close(&new); _hash_drv_close(&old); unlink(newfile); goto end; } filepos = sizeof(struct _hash_drv_header); header = old.addr; while(filepos < old.file_len) { for(i=0;ihash_rec_max;i++) { rec = (void *)((unsigned long) old.addr + filepos); nonspam = rec->nonspam & 0x0fffffff; spam = rec->spam & 0x0fffffff; cntr = ((rec->nonspam>>28) & 0x0f) | ((rec->spam>>24) & 0xf0); if(cntr<255)cntr++; rec->nonspam=nonspam|((cntr&0x0f)<<28); rec->spam=spam|((cntr&0xf0)<<24); if(nonspam+spam>0) prb=(abs(nonspam-spam)*1000)/(nonspam+spam); else prb=1000; drop=0; if(heavy) { if( (nonspam+spam<=1) || (prb<100) )drop=1; } else { if( ((nonspam*2+spam<5)&&(cntr>60)) || ((nonspam+spam<=1)&&(cntr>15)) || ((prb<200)&&(cntr>15)) || (cntr>120) ) drop=1; } if (rec->hashcode && !drop) { if (_hash_drv_set_spamrecord(&new, rec, 0)) { LOG(LOG_WARNING, "aborting on error"); _hash_drv_close(&new); _hash_drv_close(&old); unlink(newfile); goto end; } } filepos += sizeof(struct _hash_drv_spam_record); } offset = (void *)((unsigned long) old.addr + filepos); header = offset; filepos += sizeof(struct _hash_drv_header); } bcopy (old.header, new.header, sizeof(struct _hash_drv_header)); _hash_drv_close(&new); _hash_drv_close(&old); if (rename(newfile, filename) < 0) goto end; rc = 0; end: free(dir); _hash_tools_lock_free(filename, lockfile); return rc; }