/* $Id: diction.c,v 1.16 2011/06/28 00:13:48 sbajic Exp $ */
/*
DSPAM
COPYRIGHT (C) 2002-2012 DSPAM PROJECT
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
/*
* diction.c - subset of lexical data
*
* DESCRIPTION
* a diction is a subset of lexical data from a user's dictionary. in the
* context used within DSPAM, a diction is all of the matching lexical
* information from the current message being processed. the diction is
* loaded/stored by the storage driver and managed primarily by libdspam.
*/
#ifdef HAVE_CONFIG_H
#include
#endif
#include
#include
#include
#include "diction.h"
static unsigned long _ds_prime_list[] = {
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
ds_diction_t
ds_diction_create (unsigned long size)
{
ds_diction_t diction = (ds_diction_t) calloc(1, sizeof(struct _ds_diction));
int i = 0;
if (!diction) {
perror("ds_diction_create: calloc() failed");
return NULL;
}
while (_ds_prime_list[i] < size)
{ i++; }
diction->size = _ds_prime_list[i];
diction->items = 0;
diction->tbl =
(struct _ds_term **) calloc(diction->size, sizeof (struct _ds_term *));
if (!diction->tbl)
{
perror("ds_diction_create: calloc() failed");
free(diction);
return NULL;
}
diction->order = nt_create(NT_INDEX);
diction->chained_order = nt_create(NT_INDEX);
if (!diction->order || !diction->chained_order) {
nt_destroy(diction->order);
nt_destroy(diction->chained_order);
free(diction->tbl);
free(diction);
return NULL;
}
return diction;
}
void
ds_diction_destroy (ds_diction_t diction)
{
ds_term_t term, next;
ds_cursor_t cur;
if (!diction) return;
cur = ds_diction_cursor(diction);
if (!cur) {
perror("ds_diction_destroy: ds_diction_cursor() failed");
return;
}
term = ds_diction_next(cur);
while(term)
{
next = ds_diction_next(cur);
ds_diction_delete(diction, term->key);
term = next;
}
ds_diction_close(cur);
nt_destroy(diction->order);
nt_destroy(diction->chained_order);
free(diction->tbl);
free(diction);
return;
}
ds_term_t
ds_diction_term_create (ds_key_t key, const char *name)
{
ds_term_t term = (ds_term_t) calloc(1, sizeof(struct _ds_term));
if (!term) {
perror("ds_diction_term_create: calloc() failed");
} else {
term->key = key;
term->frequency = 1;
term->type = 'D';
if (name)
term->name = strdup(name);
}
return term;
}
ds_term_t
ds_diction_find (ds_diction_t diction, ds_key_t key)
{
ds_term_t term;
term = diction->tbl[key % diction->size];
while (term)
{
if (key == term->key)
return term;
term = term->next;
}
return NULL;
}
ds_term_t
ds_diction_touch(
ds_diction_t diction,
ds_key_t key,
const char *name,
int flags)
{
unsigned long bucket = key % diction->size;
ds_term_t parent = NULL;
ds_term_t insert = NULL;
ds_term_t term;
term = diction->tbl[bucket];
while (term) {
if (key == term->key) {
insert = term;
break;
}
parent = term;
term = term->next;
}
if (!insert) {
insert = ds_diction_term_create(key, name);
if (!insert) {
perror("ds_diction_touch: ds_diction_term_create() failed");
return NULL;
}
diction->items++;
if (parent)
parent->next = insert;
else
diction->tbl[bucket] = insert;
} else {
if (!insert->name && name)
insert->name = strdup(name);
insert->frequency++;
}
if (flags & DSD_CONTEXT) {
if (flags & DSD_CHAINED)
nt_add(diction->chained_order, insert);
else
nt_add(diction->order, insert);
}
return insert;
}
void
ds_diction_delete(ds_diction_t diction, ds_key_t key)
{
unsigned long bucket = key % diction->size;
ds_term_t parent = NULL;
ds_term_t delete = NULL;
ds_term_t term;
term = diction->tbl[bucket];
while(term) {
if (key == term->key) {
delete = term;
break;
}
parent = term;
term = term->next;
}
if (delete) {
if (parent)
parent->next = delete->next;
else
diction->tbl[bucket] = delete->next;
free(delete->name);
free(delete);
diction->items--;
}
return;
}
ds_cursor_t
ds_diction_cursor (ds_diction_t diction)
{
ds_cursor_t cur = (ds_cursor_t) calloc(1, sizeof(struct _ds_diction_c));
if (!cur) {
perror("ds_diction_cursor: calloc() failed");
return NULL;
}
cur->diction = diction;
cur->iter_index = 0;
cur->iter_next = NULL;
return cur;
}
ds_term_t
ds_diction_next (ds_cursor_t cur)
{
unsigned long bucket;
ds_term_t term;
ds_term_t tbl_term;
if (!cur)
return NULL;
term = cur->iter_next;
if (term) {
cur->iter_next = term->next;
return term;
}
while (cur->iter_index < cur->diction->size) {
bucket = cur->iter_index;
cur->iter_index++;
tbl_term = cur->diction->tbl[bucket];
if (tbl_term) {
cur->iter_next = tbl_term->next;
return (tbl_term);
}
}
return NULL;
}
void
ds_diction_close (ds_cursor_t cur)
{
free(cur);
return;
}
int
ds_diction_setstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
{
ds_term_t term = ds_diction_find(diction, key);
if (term) {
term->s.probability = s->probability;
term->s.spam_hits = s->spam_hits;
term->s.innocent_hits = s->innocent_hits;
term->s.status = s->status;
term->s.offset = s->offset;
return 0;
}
return -1;
}
int ds_diction_addstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
{
ds_term_t term = ds_diction_find(diction, key);
if (term) {
term->s.probability += s->probability;
term->s.spam_hits += s->spam_hits;
term->s.innocent_hits += s->innocent_hits;
if (!term->s.offset)
term->s.offset = s->offset;
if (s->status & TST_DISK)
term->s.status |= TST_DISK;
if (s->status & TST_DIRTY)
term->s.status |= TST_DIRTY;
return 0;
}
return -1;
}
int
ds_diction_getstat (ds_diction_t diction, ds_key_t key, ds_spam_stat_t s)
{
ds_term_t term = ds_diction_find(diction, key);
if (term) {
s->probability = term->s.probability;
s->spam_hits = term->s.spam_hits;
s->innocent_hits = term->s.innocent_hits;
s->status = term->s.status;
s->offset = term->s.offset;
return 0;
}
return -1;
}