/* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */ /* DSPAM COPYRIGHT (C) 2002-2012 DSPAM PROJECT This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ /* * decode.c - message decoding and parsing * * DESCRIPTION * This set of functions performs parsing and decoding of a message and * embeds its components into a ds_message_t structure, suitable for * logical access. */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include "decode.h" #include "error.h" #include "util.h" #include "language.h" #include "buffer.h" #include "base64.h" #include "libdspam.h" /* * _ds_actualize_message (const char *message) * * DESCRIPTION * primary message parser * * this function performs all decoding and actualization of the message * into the message structures defined in the .h * * INPUT ARGUMENTS * message message to decode * * RETURN VALUES * pointer to an allocated message structure (ds_message_t), NULL on failure */ ds_message_t _ds_actualize_message (const char *message) { char *line = NULL; char *in = NULL; char *m_in = NULL; ds_message_part_t current_block; ds_header_t current_heading = NULL; struct nt *boundaries = NULL; ds_message_t out = NULL; int block_position = BP_HEADER; int in_content = 0; if (!message || !(*message)) goto MEMFAIL; if (!(in = strdup(message))) goto MEMFAIL; m_in = in; boundaries = nt_create (NT_CHAR); if (!boundaries) goto MEMFAIL; out = (ds_message_t) calloc (1, sizeof (struct _ds_message)); if (!out) goto MEMFAIL; out->components = nt_create (NT_PTR); if (!out->components) goto MEMFAIL; current_block = _ds_create_message_part (); if (!current_block) goto MEMFAIL; if (nt_add (out->components, (void *) current_block) == NULL) goto MEMFAIL; /* Read the message from memory */ line = strsep (&in, "\n"); while (line) { /* Header processing */ if (block_position == BP_HEADER) { /* If we see two boundaries converged on top of one another */ if (_ds_match_boundary (boundaries, line)) { /* Add the boundary as the terminating boundary */ current_block->terminating_boundary = strdup (line + 2); current_block->original_encoding = current_block->encoding; _ds_decode_headers(current_block); current_block = _ds_create_message_part (); if (!current_block) goto MEMFAIL; if (nt_add (out->components, (void *) current_block) == NULL) goto MEMFAIL; block_position = BP_HEADER; } /* Concatenate multiline headers to the original header field data */ else if (line[0] == 32 || line[0] == '\t') { if (current_heading) { char *eow, *ptr; ptr = realloc (current_heading->data, strlen (current_heading->data) + strlen (line) + 2); if (ptr) { current_heading->data = ptr; strcat (current_heading->data, "\n"); strcat (current_heading->data, line); } else { goto MEMFAIL; } /* Our concatenated data doesn't have any whitespace between lines */ for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { } ptr = realloc (current_heading->concatenated_data, strlen (current_heading->concatenated_data) + strlen (eow) + 1); if (ptr) { current_heading->concatenated_data = ptr; strcat (current_heading->concatenated_data, eow); } else { goto MEMFAIL; } if (current_heading->original_data) { ptr = realloc (current_heading->original_data, strlen (current_heading->original_data) + strlen (line) + 2); if (ptr) { current_heading->original_data = ptr; strcat (current_heading->original_data, "\n"); strcat (current_heading->original_data, line); } else { goto MEMFAIL; } } _ds_analyze_header (current_block, current_heading, boundaries); } } /* New header field when LF or CRLF is not found */ else if (line[0] != 0 && line[0] != 13) { ds_header_t header = _ds_create_header_field (line); if (header != NULL) { _ds_analyze_header (current_block, header, boundaries); current_heading = header; nt_add (current_block->headers, header); } /* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */ } else { block_position = BP_BODY; } } /* Body processing */ else if (block_position == BP_BODY) { /* Look for a boundary in the header of a part */ if (!strncasecmp (line, "Content-Type", 12) || ((line[0] == 32 || line[0] == 9) && in_content)) { char boundary[128]; in_content = 1; if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) { if (!_ds_match_boundary (boundaries, boundary)) { _ds_push_boundary (boundaries, boundary); free(current_block->boundary); current_block->boundary = strdup (boundary); } } else { _ds_push_boundary (boundaries, ""); } } else { in_content = 0; } /* Multipart boundary was reached; move onto next block */ if (_ds_match_boundary (boundaries, line)) { /* Add the boundary as the terminating boundary */ current_block->terminating_boundary = strdup (line + 2); current_block->original_encoding = current_block->encoding; _ds_decode_headers(current_block); current_block = _ds_create_message_part (); if (!current_block) goto MEMFAIL; if (nt_add (out->components, (void *) current_block) == NULL) goto MEMFAIL; block_position = BP_HEADER; } /* Plain old message (or part) body */ else { buffer_cat (current_block->body, line); /* Don't add extra \n at the end of message's body */ if (in != NULL) buffer_cat (current_block->body, "\n"); } } line = strsep (&in, "\n"); } /* while (line) */ _ds_decode_headers(current_block); free (m_in); nt_destroy (boundaries); return out; MEMFAIL: if (m_in) free(m_in); if (boundaries) nt_destroy (boundaries); if (out) _ds_destroy_message(out); LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } /* * _ds_create_message_part * * DESCRIPTION * create and initialize a new message block component * * RETURN VALUES * pointer to an allocated message block (ds_message_part_t), NULL on failure * */ ds_message_part_t _ds_create_message_part (void) { ds_message_part_t block = (ds_message_part_t) calloc (1, sizeof (struct _ds_message_part)); if (!block) goto MEMFAIL; block->headers = nt_create (NT_PTR); if (!block->headers) goto MEMFAIL; block->body = buffer_create (NULL); if (!block->body) goto MEMFAIL; block->encoding = EN_UNKNOWN; block->media_type = MT_TEXT; block->media_subtype = MST_PLAIN; block->original_encoding = EN_UNKNOWN; block->content_disposition = PCD_UNKNOWN; /* Not really necessary, but.. */ block->boundary = NULL; block->terminating_boundary = NULL; block->original_signed_body = NULL; return block; MEMFAIL: if (block) { buffer_destroy(block->body); nt_destroy(block->headers); free(block); } LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } /* * _ds_create_header_field(const char *heading) * * DESCRIPTION * create and initialize a new header structure * * INPUT ARGUMENTS * heading plain text heading (e.g. "To: Mom") * * RETURN VALUES * pointer to an allocated header structure (ds_header_t), NULL on failure */ ds_header_t _ds_create_header_field (const char *heading) { char *in = strdup(heading); char *ptr, *m = in, *data; ds_header_t header = (ds_header_t) calloc (1, sizeof (struct _ds_header_field)); if (!header || !in) goto MEMFAIL; ptr = strsep (&in, ":"); if (ptr) { header->heading = strdup (ptr); if (!header->heading) goto MEMFAIL; else { if (!in) { LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't " "contains `:' character", __FILE__, __LINE__, header->heading); /* Use empty string as data as fallback for comtinue processing. */ in = ""; } else { /* Skip white space */ while (*in == 32 || *in == 9) ++in; } data = strdup (in); if (!data) goto MEMFAIL; header->data = data; header->concatenated_data = strdup(data); } } free (m); return header; MEMFAIL: free(header); free(m); LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } /* * _ds_decode_headers (ds_message_part_t block) * * DESCRIPTION * decodes in-line encoded headers * * RETURN VALUES * returns 0 on success */ int _ds_decode_headers (ds_message_part_t block) { #ifdef VERBOSE LOGDEBUG("decoding headers in message block"); #endif char *ptr, *dptr, *rest, *enc; ds_header_t header; struct nt_node *node_nt; struct nt_c c_nt; long decoded_len; node_nt = c_nt_first(block->headers, &c_nt); while(node_nt != NULL) { long enc_offset; header = (ds_header_t) node_nt->ptr; for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++) { enc = header->concatenated_data + enc_offset; if (!strncmp(enc, "=?", 2)) { int was_null = 0; char *ptrptr, *decoded = NULL; long offset = (long) enc - (long) header->concatenated_data; if (header->original_data == NULL) { header->original_data = strdup(header->data); was_null = 1; } strtok_r (enc, "?", &ptrptr); strtok_r (NULL, "?", &ptrptr); ptr = strtok_r (NULL, "?", &ptrptr); dptr = strtok_r (NULL, "?", &ptrptr); if (!dptr) { if (was_null && header->original_data != NULL) free(header->original_data); if (was_null) header->original_data = NULL; continue; } rest = dptr + strlen (dptr); if (rest[0]!=0) { rest++; if (rest[0]!=0) rest++; } if (ptr != NULL && (ptr[0] == 'b' || ptr[0] == 'B')) decoded = _ds_decode_base64 (dptr); else if (ptr != NULL && (ptr[0] == 'q' || ptr[0] == 'Q')) decoded = _ds_decode_quoted (dptr); decoded_len = 0; /* Append the rest of the message */ if (decoded) { char *new_alloc; decoded_len = strlen(decoded); new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2); if (new_alloc == NULL) { LOG (LOG_CRIT, ERR_MEM_ALLOC); } else { if (offset) strncpy(new_alloc, header->concatenated_data, offset); strcat(new_alloc, decoded); strcat(new_alloc, rest); free(decoded); decoded = new_alloc; } } if (decoded) { enc_offset += (decoded_len-1); free(header->concatenated_data); header->concatenated_data = decoded; } else if (was_null && header->original_data) { free(header->original_data); header->original_data = NULL; } else if (was_null) { header->original_data = NULL; } } } if (header->original_data != NULL) { free(header->data); header->data = strdup(header->concatenated_data); } node_nt = c_nt_next(block->headers, &c_nt); } return 0; } /* * _ds_analyze_header (ds_message_part_t block, ds_header_t header, * struct nt *boundaries) * * DESCRIPTION * analyzes the header passed in and performs various operations including: * - setting media type and subtype * - setting transfer encoding * - adding newly discovered boundaries * * based on the heading specified. essentially all headers should be * analyzed for future expansion * * INPUT ARGUMENTS * block the message block to which the header belongs * header the header to analyze * boundaries a list of known boundaries found within the block */ void _ds_analyze_header ( ds_message_part_t block, ds_header_t header, struct nt *boundaries) { if (!header || !block || !header->data) return; /* Content-Type header */ if (!strcasecmp (header->heading, "Content-Type")) { int len = strlen(header->data); if (!strncasecmp (header->data, "text", 4)) { block->media_type = MT_TEXT; if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5)) block->media_subtype = MST_PLAIN; else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4)) block->media_subtype = MST_HTML; else block->media_subtype = MST_OTHER; } else if (!strncasecmp (header->data, "application", 11)) { block->media_type = MT_APPLICATION; if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15)) block->media_subtype = MST_DSPAM_SIGNATURE; else block->media_subtype = MST_OTHER; } else if (!strncasecmp (header->data, "message", 7)) { block->media_type = MT_MESSAGE; if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6)) block->media_subtype = MST_RFC822; else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11)) block->media_subtype = MST_INOCULATION; else block->media_subtype = MST_OTHER; } else if (!strncasecmp (header->data, "multipart", 9)) { char boundary[128]; block->media_type = MT_MULTIPART; if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5)) block->media_subtype = MST_MIXED; else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11)) block->media_subtype = MST_ALTERNATIVE; else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6)) block->media_subtype = MST_SIGNED; else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9)) block->media_subtype = MST_ENCRYPTED; else block->media_subtype = MST_OTHER; if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) { if (!_ds_match_boundary (boundaries, boundary)) { _ds_push_boundary (boundaries, boundary); free(block->boundary); block->boundary = strdup (boundary); } } else { _ds_push_boundary (boundaries, ""); } } else { block->media_type = MT_OTHER; block->media_subtype = MST_OTHER; } } /* Content-Transfer-Encoding */ else if (!strcasecmp (header->heading, "Content-Transfer-Encoding")) { if (!strncasecmp (header->data, "7bit", 4)) block->encoding = EN_7BIT; else if (!strncasecmp (header->data, "8bit", 4)) block->encoding = EN_8BIT; else if (!strncasecmp (header->data, "quoted-printable", 16)) block->encoding = EN_QUOTED_PRINTABLE; else if (!strncasecmp (header->data, "base64", 6)) block->encoding = EN_BASE64; else if (!strncasecmp (header->data, "binary", 6)) block->encoding = EN_BINARY; else block->encoding = EN_OTHER; } if (!strcasecmp (header->heading, "Content-Disposition")) { if (!strncasecmp (header->data, "inline", 6)) block->content_disposition = PCD_INLINE; else if (!strncasecmp (header->data, "attachment", 10)) block->content_disposition = PCD_ATTACHMENT; else block->content_disposition = PCD_OTHER; } return; } /* * _ds_destroy_message (ds_message_t message) * * DESCRIPTION * destroys a message structure (ds_message_t) * * INPUT ARGUMENTS * message the message structure to be destroyed */ void _ds_destroy_message (ds_message_t message) { struct nt_node *node_nt; struct nt_c c; if (message == NULL) return; if (message->components) { node_nt = c_nt_first (message->components, &c); while (node_nt != NULL) { ds_message_part_t block = (ds_message_part_t) node_nt->ptr; _ds_destroy_block(block); node_nt = c_nt_next (message->components, &c); } nt_destroy (message->components); } free (message); return; } /* * _ds_destroy_headers (ds_message_part_t block) * * DESCRIPTION * destroys a message block's header pairs * does not free the structures themselves; these are freed at nt_destroy * * INPUT ARGUMENTS * block the message block containing the headers to destsroy */ void _ds_destroy_headers (ds_message_part_t block) { struct nt_node *node_nt; struct nt_c c; if (!block || !block->headers) return; node_nt = c_nt_first (block->headers, &c); while (node_nt != NULL) { ds_header_t field = (ds_header_t) node_nt->ptr; if (field) { free (field->original_data); free (field->heading); free (field->concatenated_data); free (field->data); } node_nt = c_nt_next (block->headers, &c); } return; } /* * _ds_destroy_block (ds_message_part_t block) * * DESCRIPTION * destroys a message block * * INPUT ARGUMENTS * block the message block to destroy */ void _ds_destroy_block (ds_message_part_t block) { if (!block) return; if (block->headers) { _ds_destroy_headers (block); nt_destroy (block->headers); } buffer_destroy (block->body); buffer_destroy (block->original_signed_body); free (block->boundary); free (block->terminating_boundary); // free (block); return; } /* * _ds_decode_block (ds_message_part_t block) * * DESCRIPTION * decodes a message block * * INPUT ARGUMENTS * block the message block to decode * * RETURN VALUES * a pointer to the allocated character array containing the decoded message * NULL on failure */ char * _ds_decode_block (ds_message_part_t block) { if (block->encoding == EN_BASE64) return _ds_decode_base64 (block->body->data); else if (block->encoding == EN_QUOTED_PRINTABLE) return _ds_decode_quoted (block->body->data); LOG (LOG_WARNING, "decoding of block encoding type %d not supported", block->encoding); return NULL; } /* * _ds_decode_{base64,quoted,hex8bit} * * DESCRIPTION * supporting block decoder functions * these function call (or perform) specific decoding functions * * INPUT ARGUMENTS * body encoded message body * * RETURN VALUES * a pointer to the allocated character array containing the decoded body */ char * _ds_decode_base64 (const char *body) { if (body == NULL) return NULL; return base64decode (body); } char * _ds_decode_quoted (const char *body) { #ifdef VERBOSE LOGDEBUG("decoding Quoted Printable encoded buffer"); #endif if (!body) return NULL; char *n, *out; const char *end, *p; n = out = malloc(strlen(body)+1); end = body + strlen(body); if (out == NULL) { LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } for (p = body; p < end; p++, n++) { if (*p == '=') { if (p[1] == '\r' && p[2] == '\n') { n -= 1; p += 2; } else if (p[1] == '\n') { n -= 1; p += 1; } else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); p += 2; } else *n = *p; } else *n = *p; } *n = '\0'; return (char *)out; } char * _ds_decode_hex8bit (const char *body) { #ifdef VERBOSE LOGDEBUG("decoding hexadecimal 8-bit encodings in message block"); #endif if (!body) return NULL; char *n, *out; const char *end, *p; n = out = malloc(strlen(body)+1); end = body + strlen(body); if (out == NULL) { LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } for (p = body; p < end; p++, n++) { if (*p == '%') if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); p += 2; } else *n = *p; else *n = *p; } *n = '\0'; return (char *)out; } /* * _ds_encode_block (ds_message_part_t block, int encoding) * * DESCRIPTION * encodes a message block using the encoding specified and replaces the * block's message body with the encoded data * * INPUT ARGUMENTS * block the message block to encode * encoding encoding to use (EN_) * * RETURN VALUES * returns 0 on success */ int _ds_encode_block (ds_message_part_t block, int encoding) { /* we can't encode a block with the same encoding */ if (block->encoding == encoding) return EINVAL; /* we can't encode a block that's already encoded */ if (block->encoding == EN_BASE64 || block->encoding == EN_QUOTED_PRINTABLE) return EFAILURE; if (encoding == EN_BASE64) { char *encoded = _ds_encode_base64 (block->body->data); buffer_destroy (block->body); block->body = buffer_create (encoded); free (encoded); block->encoding = EN_BASE64; } else if (encoding == EN_QUOTED_PRINTABLE) { /* TODO */ return 0; } LOGDEBUG("unsupported encoding: %d", encoding); return 0; } /* * _ds_encode_{base64,quoted} * * DESCRIPTION * supporting block encoder functions * these function call (or perform) specific encoding functions * * INPUT ARGUMENTS * body decoded message body * * RETURN VALUES * a pointer to the allocated character array containing the encoded body */ char * _ds_encode_base64 (const char *body) { return base64encode (body); } /* * _ds_assemble_message (ds_message_t message) * * DESCRIPTION * assembles a message structure into a flat text message * * INPUT ARGUMENTS * message the message structure (ds_message_t) to assemble * * RETURN VALUES * a pointer to the allocated character array containing the text message */ char * _ds_assemble_message (ds_message_t message, const char *newline) { buffer *out = buffer_create (NULL); struct nt_node *node_nt, *node_header; struct nt_c c_nt, c_nt2; char *heading; char *copyback; #ifdef VERBOSE int i = 0; #endif if (!out) { LOG (LOG_CRIT, ERR_MEM_ALLOC); return NULL; } node_nt = c_nt_first (message->components, &c_nt); while (node_nt != NULL && node_nt->ptr != NULL) { ds_message_part_t block = (ds_message_part_t) node_nt->ptr; #ifdef VERBOSE LOGDEBUG ("assembling component %d", i); #endif /* Assemble headers */ if (block->headers != NULL && block->headers->items > 0) { node_header = c_nt_first (block->headers, &c_nt2); while (node_header != NULL) { char *data; ds_header_t current_header = (ds_header_t) node_header->ptr; data = (current_header->original_data == NULL) ? current_header->data : current_header->original_data; heading = malloc( ((current_header->heading) ? strlen(current_header->heading) : 0) + ((data) ? strlen(data) : 0) + 3 + strlen(newline)); if (current_header->heading != NULL && (!strncmp (current_header->heading, "From ", 5) || !strncmp (current_header->heading, "--", 2))) sprintf (heading, "%s:%s%s", (current_header->heading) ? current_header->heading : "", (data) ? data : "", newline); else sprintf (heading, "%s: %s%s", (current_header->heading) ? current_header->heading : "", (data) ? data : "", newline); buffer_cat (out, heading); free(heading); node_header = c_nt_next (block->headers, &c_nt2); } } buffer_cat (out, newline); /* Assemble bodies */ if (block->original_signed_body != NULL && message->protect) buffer_cat (out, block->original_signed_body->data); else buffer_cat (out, block->body->data); if (block->terminating_boundary != NULL) { buffer_cat (out, "--"); buffer_cat (out, block->terminating_boundary); } node_nt = c_nt_next (message->components, &c_nt); #ifdef VERBOSE i++; #endif if (node_nt != NULL && node_nt->ptr != NULL) buffer_cat (out, newline); } copyback = out->data; out->data = NULL; buffer_destroy (out); return copyback; } /* * _ds_{push,pop,match,extract}_boundary * * DESCRIPTION * these functions maintain and service a boundary "stack" on the message */ int _ds_push_boundary (struct nt *stack, const char *boundary) { char *y; if (boundary == NULL || boundary[0] == 0) return EINVAL; y = malloc (strlen (boundary) + 3); if (y == NULL) return EUNKNOWN; sprintf (y, "--%s", boundary); nt_add (stack, (char *) y); free(y); return 0; } char * _ds_pop_boundary (struct nt *stack) { struct nt_node *node, *last_node = NULL, *parent_node = NULL; struct nt_c c; char *boundary = NULL; node = c_nt_first (stack, &c); while (node != NULL) { parent_node = last_node; last_node = node; node = c_nt_next (stack, &c); } if (parent_node != NULL) parent_node->next = NULL; else stack->first = NULL; if (last_node == NULL) return NULL; boundary = strdup (last_node->ptr); free (last_node->ptr); free (last_node); return boundary; } int _ds_match_boundary (struct nt *stack, const char *buff) { struct nt_node *node; struct nt_c c; node = c_nt_first (stack, &c); while (node != NULL) { if (!strncmp (buff, node->ptr, strlen (node->ptr))) { return 1; } node = c_nt_next (stack, &c); } return 0; } int _ds_extract_boundary (char *buf, size_t size, char *mem) { char *data, *ptr, *ptrptr; if (mem == NULL) return EINVAL; data = strdup(mem); if (data == NULL) { LOG(LOG_CRIT, ERR_MEM_ALLOC); return EUNKNOWN; } for(ptr=data;ptr<(data+strlen(data));ptr++) { if (!strncasecmp(ptr, "boundary", 8)) { ptr = strchr(ptr, '='); if (ptr == NULL) { free(data); return EFAILURE; } ptr++; while(isspace((int) ptr[0])) ptr++; if (ptr[0] == '"') ptr++; strtok_r(ptr, " \";\n\t", &ptrptr); strlcpy(buf, ptr, size); free(data); return 0; } } free(data); return EFAILURE; } /* * _ds_find_header (ds_message_t message, consr char *heading) { * * DESCRIPTION * finds a header and returns its value * * INPUT ARGUMENTS * message the message structure to search * heading the heading to search for * flags optional search flags * * RETURN VALUES * a pointer to the header structure's value * */ char * _ds_find_header (ds_message_t message, const char *heading) { ds_message_part_t block; ds_header_t head; struct nt_node *node_nt; if (message->components->first) { if ((block = message->components->first->ptr)==NULL) return NULL; if (block->headers == NULL) return NULL; } else { return NULL; } node_nt = block->headers->first; while(node_nt != NULL) { head = (ds_header_t) node_nt->ptr; if (head && !strcasecmp(head->heading, heading)) { return head->data; } node_nt = node_nt->next; } return NULL; } int _ds_hex2dec(unsigned char hex) { switch (hex) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return -1; } } /* * _ds_strip_html(const char *html) * * DESCRIPTION * strip html tags from the supplied message * * INPUT ARGUMENTS * html encoded message body * * RETURN VALUES * a pointer to the allocated character array containing the * stripped message * */ char * _ds_strip_html (const char *html) { #ifdef VERBOSE LOGDEBUG("stripping HTML tags from message block"); #endif size_t j = 0, k = 0, i = 0; int visible = 1; int closing_td_tag = 0; char *html2; const char *cdata_close_tag = NULL; if(!html) return NULL; static struct { unsigned int id; char *entity; } charset[] = { { 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" }, { 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" }, { 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" }, { 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" }, { 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" }, { 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" }, { 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" }, { 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" }, { 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" }, { 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" }, { 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" }, { 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" }, { 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" }, { 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" }, { 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" }, { 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" }, { 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" }, { 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" }, { 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" }, { 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" }, { 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" }, { 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" }, { 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" }, { 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" }, { 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" }, { 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" }, { 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" }, { 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" }, { 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" }, { 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" }, { 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" }, { 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" }, { 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" }, { 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" }, { 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" }, { 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" }, { 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" }, { 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" }, { 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" }, { 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" }, { 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" }, { 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " }, { 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" }, { 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" }, { 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" }, { 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" }, { 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" }, { 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" }, { 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" }, { 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" }, { 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" }, { 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" }, { 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" }, { 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" }, { 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" }, { 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" }, { 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" }, { 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" }, { 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" }, { 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" }, { 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" }, { 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" }, { 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" }, { 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" }, { 9829, "♥" }, {9830, "♦" } }; int num_chars = sizeof(charset) / sizeof(charset[0]); static struct { char *open_tag; char *uri_tag; } uritag[] = { { "", 5) == 0) { i += 4; closing_td_tag = 1; continue; } else if (strncasecmp(html + i, " 0 && !isspace(html2[j-1])) { html2[j++]=' '; } visible = 0; } else { closing_td_tag = 0; visible = 1; } k = i + 1; if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) || (html[k] >= 97 && html[k] <= 122) || (html[k] == 47) || (html[k] == 33) ))) { /* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */ visible = 1; html2[j++]=html[i]; i = k; const char *w = &(html[k]); while (j < len && (size_t)(w - html) < len && *w != '<') { html2[j++]=*w; w++; i++; } continue; } else if (html[k]) { /* find the end of the tag */ while (k < len && html[k] != '<' && html[k] != '>') {k++;} /* if we've got a tag with a uri, save the address to print later. */ char *url_tag = " "; int tag_offset = 0, x = 0, y = 0; for (y = 0; y < num_uri; y++) { x = strlen(uritag[y].open_tag); if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) { url_tag = uritag[y].uri_tag; tag_offset = i + x + 1; break; } } /* tag with uri found */ if (tag_offset > 0) { size_t url_start; /* start of url tag inclusive [ */ size_t url_tag_len = strlen(url_tag); char delim = ' '; /* find start of uri */ for (url_start = tag_offset; url_start <= k; url_start++) { if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) { url_start += url_tag_len; while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */ if (html[url_start] == '=') { url_start++; while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */ if (html[url_start] == '"') { delim = '"'; url_start++; } else if (html[url_start] == '\'') { delim = '\''; url_start++; } else { delim = '>'; } break; } else { /* Start of uri tag found but no '=' after the tag. * Skip the whole tag. */ break; } } else if ((url_start - tag_offset) >= 50) { /* The length of the html tag is over 50 characters long without * finding the start of the url/uri. Skip the whole tag. */ break; } } /* find end of uri */ if (delim != ' ') { if (url_start < len && (strncasecmp(html + url_start, "http:", 5) == 0 || strncasecmp(html + url_start, "https:", 6) == 0 || strncasecmp(html + url_start, "ftp:", 4) == 0)) { html2[j++]=' '; const char *w = &(html[url_start]); /* html2 is a buffer of len + 1, where the +1 is for NULL * termination. This means we only want to loop to len * since we will replace html2[j] right after the loop. */ while (j < len && (size_t)(w - html) < len && *w != delim) { html2[j++]=*w; w++; } html2[j++]=' '; } } } else if (strncasecmp(html + i, "

", 3) == 0 || strncasecmp(html + i, "

", 9) == 0 || strncasecmp(html + i, "", 8) == 0) { if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') { html2[j++] = '\n'; } } else if (strncasecmp(html + i, "= 32) html2[j++] = x; i += 4; } else if (html[i+6] && html[i+6] == ';' && isdigit(html[i+2]) && isdigit(html[i+3]) && isdigit(html[i+4]) && isdigit(html[i+5])) { n[0] = html[i+2]; n[1] = html[i+3]; n[2] = html[i+4]; n[3] = html[i+5]; n[4] = 0; x = atoi(n); if (x <= 255 && x >= 32) html2[j++] = x; i += 6; } else { const char *w = &(html[i]); while (*w != ';' && *w != ' ' && *w != '\t' && *w != '\0') {i++;w++;} } visible = 0; continue; } else if (html[i] == '&') { int x = 0, y = 0; for (y = 0; y < num_chars; y++) { x = strlen(charset[y].entity); if (strncasecmp(html+i,charset[y].entity,x)==0) { if (charset[y].id <= 255) html2[j++] = charset[y].id; i += x-1; visible = 0; continue; } } } if (j < len && visible) html2[j++] = html[i]; if (j >= len) i = j = len; } html2[j] = '\0'; return (char *)html2; }