source: npl/mailserver/dspam/dspam-3.10.2/src/diction.c

Last change on this file was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago

initial commit, transferred from cleaned syn3 svn tree

  • Property mode set to 100644
File size: 7.0 KB
Line 
1/* $Id: diction.c,v 1.16 2011/06/28 00:13:48 sbajic Exp $ */
2
3/*
4 DSPAM
5 COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Affero General Public License as
9 published by the Free Software Foundation, either version 3 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20*/
21
22/*
23 *  diction.c - subset of lexical data
24 *
25 *  DESCRIPTION
26 *    a diction is a subset of lexical data from a user's dictionary. in the
27 *    context used within DSPAM, a diction is all of the matching lexical
28 *    information from the current message being processed. the diction is
29 *    loaded/stored by the storage driver and managed primarily by libdspam.
30 */
31
32#ifdef HAVE_CONFIG_H
33#include <auto-config.h>
34#endif
35
36#include <stdlib.h>
37#include <stdio.h>
38#include <string.h>
39
40#include "diction.h"
41
42static unsigned long _ds_prime_list[] = {
43  53ul, 97ul, 193ul, 389ul, 769ul,
44  1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
45  49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
46  1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
47  50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
48  1610612741ul, 3221225473ul, 4294967291ul
49};
50
51ds_diction_t
52ds_diction_create (unsigned long size)
53{
54  ds_diction_t diction = (ds_diction_t) calloc(1, sizeof(struct _ds_diction));
55  int i = 0;
56
57  if (!diction) {
58    perror("ds_diction_create: calloc() failed");
59    return NULL;
60  }
61
62  while (_ds_prime_list[i] < size)
63    { i++; }
64
65  diction->size = _ds_prime_list[i];
66  diction->items = 0;
67  diction->tbl =
68    (struct _ds_term **) calloc(diction->size, sizeof (struct _ds_term *));
69  if (!diction->tbl)
70  {
71    perror("ds_diction_create: calloc() failed");
72    free(diction);
73    return NULL;
74  }
75
76  diction->order = nt_create(NT_INDEX);
77  diction->chained_order = nt_create(NT_INDEX);
78  if (!diction->order || !diction->chained_order) {
79    nt_destroy(diction->order);
80    nt_destroy(diction->chained_order);
81    free(diction->tbl);
82    free(diction);
83    return NULL;
84  }
85
86  return diction;
87}
88
89void
90ds_diction_destroy (ds_diction_t diction)
91{
92  ds_term_t term, next;
93  ds_cursor_t cur;
94
95  if (!diction) return;
96
97  cur = ds_diction_cursor(diction);
98  if (!cur) {
99    perror("ds_diction_destroy: ds_diction_cursor() failed");
100    return;
101  }
102
103  term = ds_diction_next(cur);
104  while(term)
105  {
106    next = ds_diction_next(cur);
107    ds_diction_delete(diction, term->key);
108    term = next;
109  }
110  ds_diction_close(cur);
111
112  nt_destroy(diction->order);
113  nt_destroy(diction->chained_order);
114  free(diction->tbl);
115  free(diction);
116  return;
117}
118
119ds_term_t
120ds_diction_term_create (ds_key_t key, const char *name)
121{
122  ds_term_t term = (ds_term_t) calloc(1, sizeof(struct _ds_term));
123
124  if (!term) {
125    perror("ds_diction_term_create: calloc() failed");
126  } else {
127    term->key = key;
128    term->frequency = 1;
129    term->type = 'D';
130    if (name)
131      term->name = strdup(name);
132  }
133  return term;
134}
135
136ds_term_t
137ds_diction_find (ds_diction_t diction, ds_key_t key)
138{
139  ds_term_t term;
140
141  term = diction->tbl[key % diction->size];
142  while (term)
143  {
144    if (key == term->key)
145      return term;
146    term = term->next;
147  }
148
149  return NULL;
150}
151
152ds_term_t
153ds_diction_touch(
154  ds_diction_t diction,
155  ds_key_t key,
156  const char *name,
157  int flags)
158{
159  unsigned long bucket = key % diction->size;
160  ds_term_t parent = NULL;
161  ds_term_t insert = NULL;
162  ds_term_t term;
163
164  term = diction->tbl[bucket];
165  while (term) {
166    if (key == term->key) {
167      insert = term;
168      break;
169    }
170    parent = term;
171    term = term->next;
172  }
173
174  if (!insert) {
175    insert = ds_diction_term_create(key, name);
176    if (!insert) {
177      perror("ds_diction_touch: ds_diction_term_create() failed");
178      return NULL;
179    }
180    diction->items++;
181    if (parent)
182      parent->next = insert;
183    else
184      diction->tbl[bucket] = insert;
185  } else {
186    if (!insert->name && name)
187      insert->name = strdup(name);
188    insert->frequency++;
189  }
190
191  if (flags & DSD_CONTEXT) {
192    if (flags & DSD_CHAINED)
193      nt_add(diction->chained_order, insert);
194    else
195      nt_add(diction->order, insert);
196  }
197
198  return insert;
199}
200
201void
202ds_diction_delete(ds_diction_t diction, ds_key_t key)
203{
204  unsigned long bucket = key % diction->size;
205  ds_term_t parent = NULL;
206  ds_term_t delete = NULL;
207  ds_term_t term;
208
209  term = diction->tbl[bucket];
210
211  while(term) {
212    if (key == term->key) {
213      delete = term;
214      break;
215    }
216    parent = term;
217    term = term->next;
218  }
219
220  if (delete) {
221    if (parent)
222      parent->next = delete->next;
223    else
224      diction->tbl[bucket] = delete->next;
225
226    free(delete->name);
227    free(delete);
228    diction->items--;
229  }
230  return;
231}
232
233ds_cursor_t
234ds_diction_cursor (ds_diction_t diction)
235{
236  ds_cursor_t cur = (ds_cursor_t) calloc(1, sizeof(struct _ds_diction_c));
237
238  if (!cur) {
239    perror("ds_diction_cursor: calloc() failed");
240    return NULL;
241  }
242  cur->diction    = diction;
243  cur->iter_index = 0;
244  cur->iter_next  = NULL;
245  return cur;
246}
247
248ds_term_t
249ds_diction_next (ds_cursor_t cur)
250{
251  unsigned long bucket;
252  ds_term_t term;
253  ds_term_t tbl_term;
254
255  if (!cur)
256    return NULL;
257
258  term = cur->iter_next;
259  if (term) {
260    cur->iter_next = term->next;
261    return term;
262  }
263
264  while (cur->iter_index < cur->diction->size) {
265    bucket = cur->iter_index;
266    cur->iter_index++;
267    tbl_term = cur->diction->tbl[bucket];
268    if (tbl_term) {
269      cur->iter_next = tbl_term->next;
270      return (tbl_term);
271    }
272  }
273
274  return NULL;
275}
276
277void
278ds_diction_close (ds_cursor_t cur)
279{
280  free(cur);
281  return;
282}
283
284int
285ds_diction_setstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
286{
287  ds_term_t term = ds_diction_find(diction, key);
288
289  if (term) {
290    term->s.probability = s->probability;
291    term->s.spam_hits = s->spam_hits;
292    term->s.innocent_hits = s->innocent_hits;
293    term->s.status = s->status;
294    term->s.offset = s->offset;
295    return 0;
296  }
297  return -1;
298}
299
300int ds_diction_addstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
301{
302  ds_term_t term = ds_diction_find(diction, key);
303
304  if (term) {
305    term->s.probability += s->probability;
306    term->s.spam_hits += s->spam_hits;
307    term->s.innocent_hits += s->innocent_hits;
308    if (!term->s.offset)
309      term->s.offset = s->offset;
310    if (s->status & TST_DISK)
311      term->s.status |= TST_DISK;
312    if (s->status & TST_DIRTY)
313      term->s.status |= TST_DIRTY;
314    return 0;
315  }
316  return -1;
317}
318
319int
320ds_diction_getstat  (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
321{
322  ds_term_t term = ds_diction_find(diction, key);
323
324  if (term) {
325    s->probability = term->s.probability;
326    s->spam_hits = term->s.spam_hits;
327    s->innocent_hits = term->s.innocent_hits;
328    s->status = term->s.status;
329    s->offset = term->s.offset;
330    return 0;
331  }
332  return -1;
333}
334
Note: See TracBrowser for help on using the repository browser.