source: npl/mailserver/dspam/dspam-3.10.2/src/tools.hash_drv/cssclean.c @ c5c522c

gcc484ntopperl-5.22
Last change on this file since c5c522c was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago

initial commit, transferred from cleaned syn3 svn tree

  • Property mode set to 100644
File size: 6.1 KB
Line 
1/* $Id: cssclean.c,v 1.138 2011/06/28 00:13:48 sbajic Exp $ */
2
3/*
4 DSPAM
5 COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Affero General Public License as
9 published by the Free Software Foundation, either version 3 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20*/
21
22/* cssclean.c - rebuild a hash database, omitting hapaxes */
23
24#ifdef HAVE_CONFIG_H
25#include <auto-config.h>
26#endif
27
28#include <string.h>
29#include <sys/types.h>
30#include <sys/mman.h>
31#include <sys/stat.h>
32#include <sys/uio.h>
33#include <dirent.h>
34#include <unistd.h>
35#include <errno.h>
36#include <stdlib.h>
37#include <stdio.h>
38#include <fcntl.h>
39#include <signal.h>
40#include <libgen.h>
41
42#ifdef TIME_WITH_SYS_TIME
43#   include <sys/time.h>
44#   include <time.h>
45#else
46#   ifdef HAVE_SYS_TIME_H
47#       include <sys/time.h>
48#   else
49#       include <time.h>
50#   endif
51#endif
52
53#define READ_ATTRIB(A)          _ds_read_attribute(agent_config, A)
54#define MATCH_ATTRIB(A, B)      _ds_match_attribute(agent_config, A, B)
55
56int DO_DEBUG
57#ifdef DEBUG
58= 1
59#else
60= 0
61#endif
62;
63
64#include "read_config.h"
65#include "hash_drv.h"
66#include "error.h"
67#include "language.h"
68 
69#define SYNTAX "syntax: cssclean [filename]"
70
71int cssclean(const char *filename, int heavy);
72
73int main(int argc, char *argv[]) {
74  int r;
75  int heavy=0;
76
77  if (argc<2) {
78    fprintf(stderr, "%s\n", SYNTAX);
79    exit(EXIT_FAILURE);
80  }
81
82  if ( (argc>=3) && (!strcmp(argv[2], "heavy") ) )heavy=1;
83
84  agent_config = read_config(NULL);
85  if (!agent_config) {
86    LOG(LOG_ERR, ERR_AGENT_READ_CONFIG);
87    exit(EXIT_FAILURE);
88  }
89
90  r = cssclean(argv[1], heavy);
91 
92  if (r) {
93    fprintf(stderr, "cssclean failed on error %d\n", r);
94    exit(EXIT_FAILURE);
95  }
96  exit(EXIT_SUCCESS);
97}
98
99int cssclean(const char *filename, int heavy) {
100  unsigned long i;
101  hash_drv_header_t header;
102  FILE* lockfile = NULL;
103  void *offset;
104  struct _hash_drv_map old, new;
105  hash_drv_spam_record_t rec;
106  unsigned long filepos;
107  char *dir = NULL;
108  char newfile[512];
109  struct stat st;
110  unsigned long spam, nonspam, cntr;
111  int drop, prb;
112  char *filenamecopy;
113  unsigned long hash_rec_max = HASH_REC_MAX;
114  unsigned long max_seek     = HASH_SEEK_MAX;
115  unsigned long max_extents  = 0;
116  unsigned long extent_size  = HASH_EXTENT_MAX;
117  int pctincrease = 0;
118  int flags = 0;
119  int rc = EFAILURE;
120
121  if (READ_ATTRIB("HashRecMax"))
122    hash_rec_max = strtol(READ_ATTRIB("HashRecMax"), NULL, 0);
123
124  if (READ_ATTRIB("HashExtentSize"))
125    extent_size = strtol(READ_ATTRIB("HashExtentSize"), NULL, 0);
126
127  if (READ_ATTRIB("HashMaxExtents"))
128    max_extents = strtol(READ_ATTRIB("HashMaxExtents"), NULL, 0);
129
130  if (READ_ATTRIB("HashPctIncrease")) {
131    pctincrease = atoi(READ_ATTRIB("HashPctIncrease"));
132    if (pctincrease > 100) {
133        LOG(LOG_ERR, "HashPctIncrease out of range; ignoring");
134        pctincrease = 0;
135    }
136  }
137
138  if (MATCH_ATTRIB("HashAutoExtend", "on"))
139    flags = HMAP_AUTOEXTEND;
140
141  if (READ_ATTRIB("HashMaxSeek"))
142     max_seek = strtol(READ_ATTRIB("HashMaxSeek"), NULL, 0);
143
144  if (stat(filename, &st) < 0)
145    return EFAILURE;
146
147  /* create a temporary file name */
148  dir = strdup(filename);
149  if (dir == NULL)
150    goto end;
151
152  filenamecopy = strdup(filename);
153  if (filenamecopy == NULL)
154    goto end;
155
156  snprintf(newfile, sizeof(newfile), "/%s/.dspam%u.css", dirname((char *) filenamecopy), (unsigned int) getpid());
157
158  lockfile = _hash_tools_lock_get (filename);
159  if (lockfile == NULL)
160    goto end;
161
162  if (_hash_drv_open(filename, &old, 0, max_seek,
163                     max_extents, extent_size, pctincrease, flags))
164    goto end;
165
166  if (_hash_drv_open(newfile, &new, hash_rec_max, max_seek,
167                     max_extents, extent_size, pctincrease, flags)) {
168    _hash_drv_close(&old);
169    goto end;
170  }
171
172  /* preserve counters */
173  memcpy(new.header, old.header, sizeof(*new.header));
174
175  if (fchown(new.fd, st.st_uid, st.st_gid) < 0) {
176    _hash_drv_close(&new);
177    _hash_drv_close(&old);
178    unlink(newfile);
179    goto end;
180  }
181
182  if (fchmod(new.fd, st.st_mode) < 0) {
183    _hash_drv_close(&new);
184    _hash_drv_close(&old);
185    unlink(newfile);
186    goto end;
187  }
188
189  filepos = sizeof(struct _hash_drv_header);
190  header = old.addr;
191  while(filepos < old.file_len) {
192    for(i=0;i<header->hash_rec_max;i++) {
193      rec = (void *)((unsigned long) old.addr + filepos);
194
195      nonspam = rec->nonspam & 0x0fffffff;
196      spam = rec->spam & 0x0fffffff;
197      cntr = ((rec->nonspam>>28) & 0x0f) |
198             ((rec->spam>>24) & 0xf0);
199
200      if(cntr<255)cntr++;
201      rec->nonspam=nonspam|((cntr&0x0f)<<28);
202      rec->spam=spam|((cntr&0xf0)<<24);
203
204      if(nonspam+spam>0)
205        prb=(abs(nonspam-spam)*1000)/(nonspam+spam);
206      else
207        prb=1000;
208
209      drop=0;
210
211      if(heavy) {
212        if( (nonspam+spam<=1) ||
213            (prb<100)
214          )drop=1;
215      }
216      else {
217        if( ((nonspam*2+spam<5)&&(cntr>60)) ||
218            ((nonspam+spam<=1)&&(cntr>15))  ||
219            ((prb<200)&&(cntr>15)) ||
220            (cntr>120)
221          ) drop=1;
222      }
223
224      if (rec->hashcode && !drop) {
225        if (_hash_drv_set_spamrecord(&new, rec, 0)) {
226          LOG(LOG_WARNING, "aborting on error");
227          _hash_drv_close(&new);
228          _hash_drv_close(&old);
229          unlink(newfile);
230          goto end;
231        }
232      }
233      filepos += sizeof(struct _hash_drv_spam_record);
234    }
235    offset = (void *)((unsigned long) old.addr + filepos);
236    header = offset;
237    filepos += sizeof(struct _hash_drv_header);
238  }
239
240  bcopy (old.header, new.header, sizeof(struct _hash_drv_header));
241  _hash_drv_close(&new);
242  _hash_drv_close(&old);
243  if (rename(newfile, filename) < 0)
244    goto end;
245  rc = 0;
246
247end:
248  free(dir);
249  _hash_tools_lock_free(filename, lockfile);
250  return rc;
251}
Note: See TracBrowser for help on using the repository browser.