/* $Id: bnr.c,v 1.32 2011/06/28 00:13:48 sbajic Exp $ */
/*
DSPAM
COPYRIGHT (C) 2002-2012 DSPAM PROJECT
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
/*
* bnr.c - bayesian noise reduction - contextual symmetry logic
*
* http://bnr.nuclearelephant.com
*
*/
#ifdef HAVE_CONFIG_H
#include
#endif
#include
#include
#include
#include
#include
#ifdef HAVE_UNISTD_H
#include
#endif
#include
#include
#include
#include "bnr.h"
/*
* bnr_init(): Create and initialize a new noise reduction context
* parameters: type (int) BNR_CHAR: Token identifier as character arrays
* BNR_INDEX: Token identifiers as pointers
* identifier (char) An identifier to add to the pattern
* name to identify the type of stream
*
* returns: pointer to the new context
*/
BNR_CTX *bnr_init(int type, char identifier)
{
BNR_CTX *BTX;
BTX = calloc(1, sizeof(BNR_CTX));
if (BTX == NULL) {
perror("memory allocation error: bnr_init() failed");
return NULL;
}
BTX->identifier = identifier;
BTX->window_size = 3;
BTX->ex_radius = 0.25;
BTX->in_radius = 0.33;
BTX->stream = bnr_list_create(type);
BTX->patterns = bnr_hash_create(1543ul);
if (BTX->stream == NULL || BTX->patterns == NULL) {
perror("memory allocation error: bnr_init() failed");
bnr_list_destroy(BTX->stream);
bnr_hash_destroy(BTX->patterns);
free(BTX);
return NULL;
}
return BTX;
}
/*
* bnr_destroy(): Destroys a noise reduction context no longer being used
* parameters: BTX (BNR_CTX *) The context to destroy
* returns: 0 on success
*/
int bnr_destroy(BNR_CTX *BTX) {
bnr_list_destroy(BTX->stream);
bnr_hash_destroy(BTX->patterns);
free(BTX);
return 0;
}
/*
* bnr_add(): Adds a token to the noise reduction stream. This function
* should be called once for each token in the message body (in order).
*
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* token (void *) The token's name, or pointer if NT_INDEX
* value (float) The token's probability
* returns: 0 on success
*/
int bnr_add(BNR_CTX *BTX, void *token, float value) {
return (bnr_list_insert(BTX->stream, token, value) != NULL) ? 0 : EFAILURE;
}
/*
* bnr_instantiate(): Instantiates a series of patterns for the given stream.
* This function should be called after all tokens are added to the stream.
*
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* returns: 0 on success
*/
int bnr_instantiate(BNR_CTX *BTX) {
int BNR_SIZE = BTX->window_size;
float previous_bnr_probs[BNR_SIZE];
struct bnr_list_node *node_list;
struct bnr_list_c c_list;
char bnr_token[64];
int i;
for(i=0;istream, &c_list);
while(node_list != NULL) {
for(i=1;ivalue);
sprintf(bnr_token, "bnr.%c|", BTX->identifier);
for(i=0;ipatterns, bnr_token);
node_list = c_bnr_list_next(BTX->stream, &c_list);
}
return 0;
}
/*
* bnr_get_pattern(): Retrieves the next instantiated pattern.
* This function should be called after a call to bnr_instantiate(). Each
* call to bnr_get_pattern() will return the next instantiated pattern, which
* should then be looked up by your classifier and assigned a value using
* bnr_set_pattern().
*
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* returns: The name of the next instantiated pattern in the context
*/
char *bnr_get_pattern(BNR_CTX *BTX) {
struct bnr_hash_node *node;
if (!BTX->pattern_iter) {
node = c_bnr_hash_first(BTX->patterns, &BTX->c_pattern);
BTX->pattern_iter = 1;
} else {
node = c_bnr_hash_next(BTX->patterns, &BTX->c_pattern);
}
if (node)
return node->name;
BTX->pattern_iter = 0;
return NULL;
}
/*
* bnr_set_pattern(): Sets the value of a pattern
* This function should be called once for each pattern instantiated. The
* name of the patterns can be retrieved using repeated calls to
* bnr_get_pattern(). The value of the pattern should then be looked up by
* the classifier and set in the context using this function.
*
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* name (const char *) The name of the pattern to set
* value (float) The p-value of the pattern
* returns: 0 on success
*/
int bnr_set_pattern(BNR_CTX *BTX, const char *name, float value) {
return bnr_hash_set(BTX->patterns, name, value);
}
/*
* bnr_get_token() Retrieves the next token from the stream.
* This function should be called after a call to bnr_finalize(). Each
* call to bnr_get_token() will return the next token and set its elimination
* status (by way of the passed-in variable).
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* returns: The name (or pointer) of the next non-eliminated token
*/
void *bnr_get_token(BNR_CTX *BTX, int *eliminated) {
struct bnr_list_node *node;
if (BTX->stream_iter == 0) {
BTX->stream_iter = 1;
node = c_bnr_list_first(BTX->stream, &BTX->c_stream);
} else {
node = c_bnr_list_next(BTX->stream, &BTX->c_stream);
}
if (node) {
if (node->eliminated)
*eliminated = 1;
else
*eliminated = 0;
return node->ptr;
}
BTX->stream_iter = 0;
return NULL;
}
/*
* _bnr_round(): [internal] Round value to the nearest 0.05
* parameters: value (float) Value to be rounded
* returns: Rounded value as a float
*/
float _bnr_round(float n) {
int r = (n*100);
while(r % 5)
r++;
return (r/100.0);
}
/*
* bnr_finalize() Finalizes the noise reduction context and performs dubbing
* This function should be called after all calls to bnr_set_pattern() have
* completed. This function performs the actual noise reduction process
* after which calls to bnr_get_token() may be called.
*
* parameters: BTX (BNR_CTX *) The noise reduction context to use
* returns: 0 on success
*/
int bnr_finalize(BNR_CTX *BTX) {
int BNR_SIZE = BTX->window_size;
struct bnr_list_node * previous_bnr_tokens[BNR_SIZE];
float previous_bnr_probs[BNR_SIZE];
struct bnr_list_node *node_list;
struct bnr_list_c c_list;
char bnr_token[64];
int i, interesting;
for(i=0;istream, &c_list);
while(node_list != NULL) {
float pattern_value;
for(i=1;ivalue);
previous_bnr_tokens[BNR_SIZE-1] = node_list;
sprintf(bnr_token, "bnr.%c|", BTX->identifier);
for(i=0;ipatterns, bnr_token);
interesting = (fabs(0.5-pattern_value) > BTX->ex_radius);
if (interesting) {
#ifdef LIBBNR_VERBOSE_DEBUG
fprintf(stderr, "Analyzing Pattern '%s' P-Value: %1.5f\n", bnr_token,
pattern_value);
#endif
/* Eliminate inconsistent tokens */
for(i=0;ivalue - pattern_value) > BTX->in_radius)
{
#ifdef LIBBNR_VERBOSE_DEBUG
fprintf(stderr, "\tEliminating '%s' P-Value: %1.5f\n",
(const char *) previous_bnr_tokens[i]->ptr,
previous_bnr_tokens[i]->value);
#endif
BTX->eliminations++;
previous_bnr_tokens[i]->eliminated = 1;
}
}
}
}
node_list = c_bnr_list_next(BTX->stream, &c_list);
}
return 0;
}