source: npl/mailserver/dspam/dspam-3.10.2/src/decode.c @ c5c522c

gcc484ntopperl-5.22
Last change on this file since c5c522c was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago

initial commit, transferred from cleaned syn3 svn tree

  • Property mode set to 100644
File size: 43.1 KB
Line 
1/* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */
2
3/*
4 DSPAM
5 COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Affero General Public License as
9 published by the Free Software Foundation, either version 3 of the
10 License, or (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20*/
21
22/*
23 * decode.c - message decoding and parsing
24 *
25 *  DESCRIPTION
26 *    This set of functions performs parsing and decoding of a message and
27 *    embeds its components into a ds_message_t structure, suitable for
28 *    logical access.
29 */
30
31#ifdef HAVE_CONFIG_H
32#include <auto-config.h>
33#endif
34
35#include <stdio.h>
36#include <string.h>
37#include <stdlib.h>
38#include <ctype.h>
39
40#include "decode.h"
41#include "error.h"
42#include "util.h"
43#include "language.h"
44#include "buffer.h"
45#include "base64.h"
46#include "libdspam.h"
47
48/*
49 * _ds_actualize_message (const char *message)
50 *
51 * DESCRIPTION
52 *   primary message parser
53 *
54 *   this function performs all decoding and actualization of the message
55 *   into the message structures defined in the .h
56 *
57 * INPUT ARGUMENTS
58 *      message    message to decode
59 *
60 * RETURN VALUES
61 *   pointer to an allocated message structure (ds_message_t), NULL on failure
62 */
63
64ds_message_t
65_ds_actualize_message (const char *message)
66{
67  char *line = NULL;
68  char *in = NULL;
69  char *m_in = NULL;
70  ds_message_part_t current_block;
71  ds_header_t current_heading = NULL;
72  struct nt *boundaries = NULL;
73  ds_message_t out = NULL;
74  int block_position = BP_HEADER;
75  int in_content = 0;
76
77  if (!message || !(*message))
78    goto MEMFAIL;
79
80  if (!(in = strdup(message)))
81    goto MEMFAIL;
82
83  m_in = in;
84
85  boundaries = nt_create (NT_CHAR);
86  if (!boundaries)
87    goto MEMFAIL;
88
89  out = (ds_message_t) calloc (1, sizeof (struct _ds_message));
90  if (!out)
91    goto MEMFAIL;
92
93  out->components = nt_create (NT_PTR);
94  if (!out->components)
95    goto MEMFAIL;
96
97  current_block = _ds_create_message_part ();
98  if (!current_block)
99    goto MEMFAIL;
100
101  if (nt_add (out->components, (void *) current_block) == NULL)
102    goto MEMFAIL;
103
104  /* Read the message from memory */
105
106  line = strsep (&in, "\n");
107  while (line)
108  {
109
110    /* Header processing */
111
112    if (block_position == BP_HEADER)
113    {
114
115      /* If we see two boundaries converged on top of one another */
116
117      if (_ds_match_boundary (boundaries, line))
118      {
119
120        /* Add the boundary as the terminating boundary */
121
122        current_block->terminating_boundary = strdup (line + 2);
123        current_block->original_encoding = current_block->encoding;
124
125        _ds_decode_headers(current_block);
126        current_block = _ds_create_message_part ();
127
128        if (!current_block)
129          goto MEMFAIL;
130
131        if (nt_add (out->components, (void *) current_block) == NULL)
132          goto MEMFAIL;
133
134        block_position = BP_HEADER;
135      }
136
137      /* Concatenate multiline headers to the original header field data */
138
139      else if (line[0] == 32 || line[0] == '\t')
140      {
141        if (current_heading)
142        {
143          char *eow, *ptr;
144
145          ptr = realloc (current_heading->data,
146                         strlen (current_heading->data) + strlen (line) + 2);
147          if (ptr)
148          {
149            current_heading->data = ptr;
150            strcat (current_heading->data, "\n");
151            strcat (current_heading->data, line);
152          } else {
153            goto MEMFAIL;
154          }
155
156          /* Our concatenated data doesn't have any whitespace between lines */
157          for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { }
158
159          ptr =
160            realloc (current_heading->concatenated_data,
161              strlen (current_heading->concatenated_data) + strlen (eow) + 1);
162          if (ptr)
163          {
164            current_heading->concatenated_data = ptr;
165            strcat (current_heading->concatenated_data, eow);
166          } else {
167            goto MEMFAIL;
168          }
169
170          if (current_heading->original_data) {
171            ptr =
172              realloc (current_heading->original_data,
173                       strlen (current_heading->original_data) +
174                               strlen (line) + 2);
175            if (ptr) {
176              current_heading->original_data = ptr;
177              strcat (current_heading->original_data, "\n");
178              strcat (current_heading->original_data, line);
179            } else {
180              goto MEMFAIL;
181            }
182          }
183
184          _ds_analyze_header (current_block, current_heading, boundaries);
185        }
186      }
187
188      /* New header field when LF or CRLF is not found */
189
190      else if (line[0] != 0  && line[0] != 13)
191      {
192        ds_header_t header = _ds_create_header_field (line);
193
194        if (header != NULL)
195        {
196          _ds_analyze_header (current_block, header, boundaries);
197          current_heading = header;
198          nt_add (current_block->headers, header);
199        }
200
201
202      /* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */
203
204      } else {
205        block_position = BP_BODY;
206      }
207    }
208
209    /* Body processing */
210
211    else if (block_position == BP_BODY)
212    {
213      /* Look for a boundary in the header of a part */
214
215      if (!strncasecmp (line, "Content-Type", 12)
216            || ((line[0] == 32 || line[0] == 9) && in_content))
217      {
218        char boundary[128];
219        in_content = 1;
220        if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) {
221          if (!_ds_match_boundary (boundaries, boundary)) {
222            _ds_push_boundary (boundaries, boundary);
223            free(current_block->boundary);
224            current_block->boundary = strdup (boundary);
225          }
226        } else {
227          _ds_push_boundary (boundaries, "");
228        }
229      } else {
230        in_content = 0;
231      }
232
233      /* Multipart boundary was reached; move onto next block */
234
235      if (_ds_match_boundary (boundaries, line))
236      {
237
238        /* Add the boundary as the terminating boundary */
239
240        current_block->terminating_boundary = strdup (line + 2);
241        current_block->original_encoding = current_block->encoding;
242
243        _ds_decode_headers(current_block);
244        current_block = _ds_create_message_part ();
245
246        if (!current_block)
247          goto MEMFAIL;
248
249        if (nt_add (out->components, (void *) current_block) == NULL)
250          goto MEMFAIL;
251
252        block_position = BP_HEADER;
253      }
254
255      /* Plain old message (or part) body */
256
257      else {
258        buffer_cat (current_block->body, line);
259
260        /* Don't add extra \n at the end of message's body */
261
262        if (in != NULL)
263          buffer_cat (current_block->body, "\n");
264      }
265    }
266
267    line = strsep (&in, "\n");
268  } /* while (line) */
269
270  _ds_decode_headers(current_block);
271
272  free (m_in);
273  nt_destroy (boundaries);
274  return out;
275
276MEMFAIL:
277  if (m_in) free(m_in);
278  if (boundaries) nt_destroy (boundaries);
279  if (out) _ds_destroy_message(out);
280  LOG (LOG_CRIT, ERR_MEM_ALLOC);
281  return NULL;
282}
283
284/*
285 * _ds_create_message_part
286 *
287 * DESCRIPTION
288 *   create and initialize a new message block component
289 *
290 * RETURN VALUES
291 *   pointer to an allocated message block (ds_message_part_t), NULL on failure
292 *
293 */
294
295ds_message_part_t
296_ds_create_message_part (void)
297{
298  ds_message_part_t block =
299    (ds_message_part_t) calloc (1, sizeof (struct _ds_message_part));
300
301  if (!block)
302    goto MEMFAIL;
303
304  block->headers = nt_create (NT_PTR);
305  if (!block->headers)
306    goto MEMFAIL;
307
308  block->body = buffer_create (NULL);
309  if (!block->body)
310    goto MEMFAIL;
311
312  block->encoding   = EN_UNKNOWN;
313  block->media_type = MT_TEXT;
314  block->media_subtype     = MST_PLAIN;
315  block->original_encoding = EN_UNKNOWN;
316  block->content_disposition = PCD_UNKNOWN;
317
318  /* Not really necessary, but.. */
319
320  block->boundary = NULL;
321  block->terminating_boundary = NULL;
322  block->original_signed_body = NULL;
323
324
325  return block;
326
327MEMFAIL:
328  if (block) {
329    buffer_destroy(block->body);
330    nt_destroy(block->headers);
331    free(block);
332  }
333  LOG (LOG_CRIT, ERR_MEM_ALLOC);
334  return NULL;
335}
336
337/*
338 * _ds_create_header_field(const char *heading)
339 *
340 * DESCRIPTION
341 *   create and initialize a new header structure
342 *
343 * INPUT ARGUMENTS
344 *      heading    plain text heading (e.g. "To: Mom")
345 *
346 * RETURN VALUES
347 *   pointer to an allocated header structure (ds_header_t), NULL on failure
348 */
349
350ds_header_t
351_ds_create_header_field (const char *heading)
352{
353  char *in = strdup(heading);
354  char *ptr, *m = in, *data;
355  ds_header_t header =
356    (ds_header_t) calloc (1, sizeof (struct _ds_header_field));
357
358  if (!header || !in)
359    goto MEMFAIL;
360
361  ptr = strsep (&in, ":");
362  if (ptr) {
363    header->heading = strdup (ptr);
364    if (!header->heading)
365      goto MEMFAIL;
366    else
367    {
368      if (!in)
369      {
370        LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't "
371                 "contains `:' character", __FILE__, __LINE__, header->heading);
372
373        /* Use empty string as data as fallback for comtinue processing. */
374
375        in = "";
376      }
377      else
378      {
379        /* Skip white space */
380        while (*in == 32 || *in == 9)
381          ++in;
382      }
383
384      data = strdup (in);
385      if (!data)
386        goto MEMFAIL;
387
388      header->data = data;
389      header->concatenated_data = strdup(data);
390    }
391  }
392
393  free (m);
394  return header;
395
396MEMFAIL:
397  free(header);
398  free(m);
399  LOG (LOG_CRIT, ERR_MEM_ALLOC);
400  return NULL;
401}
402
403/*
404 * _ds_decode_headers (ds_message_part_t block)
405 *
406 * DESCRIPTION
407 *   decodes in-line encoded headers
408 *
409 * RETURN VALUES
410 *   returns 0 on success
411 */
412
413int
414_ds_decode_headers (ds_message_part_t block) {
415#ifdef VERBOSE
416  LOGDEBUG("decoding headers in message block");
417#endif
418  char *ptr, *dptr, *rest, *enc;
419  ds_header_t header;
420  struct nt_node *node_nt;
421  struct nt_c c_nt;
422  long decoded_len;
423
424  node_nt = c_nt_first(block->headers, &c_nt);
425  while(node_nt != NULL) {
426    long enc_offset;
427    header = (ds_header_t) node_nt->ptr;
428
429    for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++)
430    {
431      enc = header->concatenated_data + enc_offset;
432
433      if (!strncmp(enc, "=?", 2)) {
434        int was_null = 0;
435        char *ptrptr, *decoded = NULL;
436        long offset = (long) enc - (long) header->concatenated_data;
437
438        if (header->original_data == NULL) {
439          header->original_data = strdup(header->data);
440          was_null = 1;
441        }
442
443        strtok_r (enc, "?", &ptrptr);
444        strtok_r (NULL, "?", &ptrptr);
445        ptr = strtok_r (NULL, "?", &ptrptr);
446        dptr = strtok_r (NULL, "?", &ptrptr);
447        if (!dptr) {
448          if (was_null && header->original_data != NULL)
449            free(header->original_data);
450          if (was_null)
451            header->original_data = NULL;
452          continue;
453        }
454
455        rest = dptr + strlen (dptr);
456        if (rest[0]!=0) {
457          rest++;
458          if (rest[0]!=0) rest++;
459        }
460
461        if (ptr != NULL && (ptr[0] == 'b' || ptr[0] == 'B'))
462          decoded = _ds_decode_base64 (dptr);
463        else if (ptr != NULL && (ptr[0] == 'q' || ptr[0] == 'Q'))
464          decoded = _ds_decode_quoted (dptr);
465
466        decoded_len = 0;
467
468        /* Append the rest of the message */
469
470        if (decoded)
471        {
472          char *new_alloc;
473
474          decoded_len = strlen(decoded);
475          new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2);
476          if (new_alloc == NULL) {
477            LOG (LOG_CRIT, ERR_MEM_ALLOC);
478          }
479          else
480          {
481            if (offset)
482              strncpy(new_alloc, header->concatenated_data, offset);
483
484            strcat(new_alloc, decoded);
485            strcat(new_alloc, rest);
486            free(decoded);
487            decoded = new_alloc;
488          }
489        }
490
491        if (decoded) {
492          enc_offset += (decoded_len-1);
493          free(header->concatenated_data);
494          header->concatenated_data = decoded;
495        }
496        else if (was_null && header->original_data) {
497          free(header->original_data);
498          header->original_data = NULL;
499        }
500        else if (was_null) {
501          header->original_data = NULL;
502        }
503      }
504    }
505
506    if (header->original_data != NULL) {
507      free(header->data);
508      header->data = strdup(header->concatenated_data);
509    }
510
511    node_nt = c_nt_next(block->headers, &c_nt);
512  }
513
514  return 0;
515}
516
517/*
518 *  _ds_analyze_header (ds_message_part_t block, ds_header_t header,
519 *                      struct nt *boundaries)
520 *
521 * DESCRIPTION
522 *   analyzes the header passed in and performs various operations including:
523 *     - setting media type and subtype
524 *     - setting transfer encoding
525 *     - adding newly discovered boundaries
526 *
527 *   based on the heading specified. essentially all headers should be
528 *   analyzed for future expansion
529 *
530 * INPUT ARGUMENTS
531 *      block           the message block to which the header belongs
532 *      header          the header to analyze
533 *      boundaries      a list of known boundaries found within the block
534 */
535
536void
537_ds_analyze_header (
538  ds_message_part_t block,
539  ds_header_t header,
540  struct nt *boundaries)
541{
542  if (!header || !block || !header->data)
543    return;
544
545  /* Content-Type header */
546
547  if (!strcasecmp (header->heading, "Content-Type"))
548  {
549    int len = strlen(header->data);
550    if (!strncasecmp (header->data, "text", 4)) {
551      block->media_type = MT_TEXT;
552      if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5))
553        block->media_subtype = MST_PLAIN;
554      else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4))
555        block->media_subtype = MST_HTML;
556      else
557        block->media_subtype = MST_OTHER;
558    }
559
560    else if (!strncasecmp (header->data, "application", 11))
561    {
562      block->media_type = MT_APPLICATION;
563      if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15))
564        block->media_subtype = MST_DSPAM_SIGNATURE;
565      else
566        block->media_subtype = MST_OTHER;
567    }
568
569    else if (!strncasecmp (header->data, "message", 7))
570    {
571      block->media_type = MT_MESSAGE;
572      if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6))
573        block->media_subtype = MST_RFC822;
574      else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11))
575        block->media_subtype = MST_INOCULATION;
576      else
577        block->media_subtype = MST_OTHER;
578    }
579
580    else if (!strncasecmp (header->data, "multipart", 9))
581    {
582      char boundary[128];
583
584      block->media_type = MT_MULTIPART;
585      if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5))
586        block->media_subtype = MST_MIXED;
587      else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11))
588        block->media_subtype = MST_ALTERNATIVE;
589      else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6))
590        block->media_subtype = MST_SIGNED;
591      else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9))
592        block->media_subtype = MST_ENCRYPTED;
593      else
594        block->media_subtype = MST_OTHER;
595
596      if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) {
597        if (!_ds_match_boundary (boundaries, boundary)) {
598          _ds_push_boundary (boundaries, boundary);
599          free(block->boundary);
600          block->boundary = strdup (boundary);
601        }
602      } else {
603        _ds_push_boundary (boundaries, "");
604      }
605    }
606    else {
607      block->media_type = MT_OTHER;
608      block->media_subtype = MST_OTHER;
609    }
610
611  }
612
613  /* Content-Transfer-Encoding */
614
615  else if (!strcasecmp (header->heading, "Content-Transfer-Encoding"))
616  {
617    if (!strncasecmp (header->data, "7bit", 4))
618      block->encoding = EN_7BIT;
619    else if (!strncasecmp (header->data, "8bit", 4))
620      block->encoding = EN_8BIT;
621    else if (!strncasecmp (header->data, "quoted-printable", 16))
622      block->encoding = EN_QUOTED_PRINTABLE;
623    else if (!strncasecmp (header->data, "base64", 6))
624      block->encoding = EN_BASE64;
625    else if (!strncasecmp (header->data, "binary", 6))
626      block->encoding = EN_BINARY;
627    else
628      block->encoding = EN_OTHER;
629  }
630
631  if (!strcasecmp (header->heading, "Content-Disposition"))
632  {
633    if (!strncasecmp (header->data, "inline", 6))
634      block->content_disposition = PCD_INLINE;
635    else if (!strncasecmp (header->data, "attachment", 10))
636      block->content_disposition = PCD_ATTACHMENT;
637    else
638      block->content_disposition = PCD_OTHER;
639  }
640
641  return;
642}
643
644/*
645 * _ds_destroy_message (ds_message_t message)
646 *
647 * DESCRIPTION
648 *   destroys a message structure (ds_message_t)
649 *
650 * INPUT ARGUMENTS
651 *      message    the message structure to be destroyed
652 */
653
654void
655_ds_destroy_message (ds_message_t message)
656{
657  struct nt_node *node_nt;
658  struct nt_c c;
659
660  if (message == NULL)
661    return;
662
663  if (message->components) {
664    node_nt = c_nt_first (message->components, &c);
665    while (node_nt != NULL)
666    {
667      ds_message_part_t block = (ds_message_part_t) node_nt->ptr;
668      _ds_destroy_block(block);
669      node_nt = c_nt_next (message->components, &c);
670    }
671    nt_destroy (message->components);
672  }
673  free (message);
674  return;
675}
676
677/*
678 * _ds_destroy_headers (ds_message_part_t block)
679 *
680 * DESCRIPTION
681 *   destroys a message block's header pairs
682 *   does not free the structures themselves; these are freed at nt_destroy
683 *
684 * INPUT ARGUMENTS
685 *      block    the message block containing the headers to destsroy
686 */
687
688void
689_ds_destroy_headers (ds_message_part_t block)
690{
691  struct nt_node *node_nt;
692  struct nt_c c;
693
694  if (!block || !block->headers)
695    return;
696
697  node_nt = c_nt_first (block->headers, &c);
698  while (node_nt != NULL)
699  {
700    ds_header_t field = (ds_header_t) node_nt->ptr;
701
702    if (field)
703    {
704      free (field->original_data);
705      free (field->heading);
706      free (field->concatenated_data);
707      free (field->data);
708    }
709    node_nt = c_nt_next (block->headers, &c);
710  }
711
712  return;
713}
714
715/*
716 * _ds_destroy_block (ds_message_part_t block)
717 *
718 * DESCRIPTION
719 *   destroys a message block
720 *
721 * INPUT ARGUMENTS
722 *   block   the message block to destroy
723 */
724
725void
726_ds_destroy_block (ds_message_part_t block)
727{
728  if (!block)
729    return;
730
731  if (block->headers)
732  {
733    _ds_destroy_headers (block);
734    nt_destroy (block->headers);
735  }
736  buffer_destroy (block->body);
737  buffer_destroy (block->original_signed_body);
738  free (block->boundary);
739  free (block->terminating_boundary);
740//  free (block);
741  return;
742}
743
744/*
745 * _ds_decode_block (ds_message_part_t block)
746 *
747 * DESCRIPTION
748 *   decodes a message block
749 *
750 * INPUT ARGUMENTS
751 *   block   the message block to decode
752 *
753 * RETURN VALUES
754 *   a pointer to the allocated character array containing the decoded message
755 *   NULL on failure
756 */
757
758char *
759_ds_decode_block (ds_message_part_t block)
760{
761  if (block->encoding == EN_BASE64)
762    return _ds_decode_base64 (block->body->data);
763  else if (block->encoding == EN_QUOTED_PRINTABLE)
764    return _ds_decode_quoted (block->body->data);
765
766  LOG (LOG_WARNING, "decoding of block encoding type %d not supported",
767       block->encoding);
768  return NULL;
769}
770
771/*
772 * _ds_decode_{base64,quoted,hex8bit}
773 *
774 * DESCRIPTION
775 *   supporting block decoder functions
776 *   these function call (or perform) specific decoding functions
777 *
778 * INPUT ARGUMENTS
779 *   body       encoded message body
780 *
781 * RETURN VALUES
782 *   a pointer to the allocated character array containing the decoded body
783 */
784
785char *
786_ds_decode_base64 (const char *body)
787{
788  if (body == NULL)
789    return NULL;
790
791  return base64decode (body);
792}
793
794char *
795_ds_decode_quoted (const char *body)
796{
797#ifdef VERBOSE
798  LOGDEBUG("decoding Quoted Printable encoded buffer");
799#endif
800  if (!body)
801    return NULL;
802
803  char *n, *out;
804  const char *end, *p;
805
806  n = out = malloc(strlen(body)+1);
807  end = body + strlen(body);
808
809  if (out == NULL) {
810    LOG (LOG_CRIT, ERR_MEM_ALLOC);
811    return NULL;
812  }
813
814  for (p = body; p < end; p++, n++) {
815    if (*p == '=') {
816      if (p[1] == '\r' && p[2] == '\n') {
817        n -= 1;
818        p += 2;
819      } else if (p[1] == '\n') {
820        n -= 1;
821        p += 1;
822      } else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
823        *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2]));
824        p += 2;
825      } else
826        *n = *p;
827    } else
828      *n = *p;
829  }
830
831  *n = '\0';
832  return (char *)out;
833}
834
835char *
836_ds_decode_hex8bit (const char *body)
837{
838#ifdef VERBOSE
839  LOGDEBUG("decoding hexadecimal 8-bit encodings in message block");
840#endif
841  if (!body)
842    return NULL;
843
844  char *n, *out;
845  const char *end, *p;
846
847  n = out = malloc(strlen(body)+1);
848  end = body + strlen(body);
849
850  if (out == NULL) {
851    LOG (LOG_CRIT, ERR_MEM_ALLOC);
852    return NULL;
853  }
854
855  for (p = body; p < end; p++, n++) {
856    if (*p == '%')
857      if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
858        *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2]));
859        p += 2;
860      } else
861        *n = *p;
862    else
863      *n = *p;
864  }
865
866  *n = '\0';
867  return (char *)out;
868}
869
870/*
871 * _ds_encode_block (ds_message_part_t block, int encoding)
872 *
873 * DESCRIPTION
874 *   encodes a message block using the encoding specified and replaces the
875 *   block's message body with the encoded data
876 *
877 * INPUT ARGUMENTS
878 *      block       the message block to encode
879 *      encoding    encoding to use (EN_)
880 *
881 * RETURN VALUES
882 *    returns 0 on success
883 */
884
885int
886_ds_encode_block (ds_message_part_t block, int encoding)
887{
888  /* we can't encode a block with the same encoding */
889
890  if (block->encoding == encoding)
891    return EINVAL;
892
893  /* we can't encode a block that's already encoded */
894
895  if (block->encoding == EN_BASE64 || block->encoding == EN_QUOTED_PRINTABLE)
896    return EFAILURE;
897
898  if (encoding == EN_BASE64) {
899    char *encoded = _ds_encode_base64 (block->body->data);
900    buffer_destroy (block->body);
901    block->body = buffer_create (encoded);
902    free (encoded);
903    block->encoding = EN_BASE64;
904  }
905  else if (encoding == EN_QUOTED_PRINTABLE) {
906
907    /* TODO */
908
909    return 0;
910  }
911
912  LOGDEBUG("unsupported encoding: %d", encoding);
913  return 0;
914}
915
916/*
917 * _ds_encode_{base64,quoted}
918 *
919 * DESCRIPTION
920 *   supporting block encoder functions
921 *   these function call (or perform) specific encoding functions
922 *
923 * INPUT ARGUMENTS
924 *   body        decoded message body
925 *
926 * RETURN VALUES
927 *   a pointer to the allocated character array containing the encoded body
928 */
929
930char *
931_ds_encode_base64 (const char *body)
932{
933  return base64encode (body);
934}
935
936/*
937 * _ds_assemble_message (ds_message_t message)
938 *
939 * DESCRIPTION
940 *   assembles a message structure into a flat text message
941 *
942 * INPUT ARGUMENTS
943 *      message    the message structure (ds_message_t) to assemble
944 *
945 * RETURN VALUES
946 *   a pointer to the allocated character array containing the text message
947 */
948
949char *
950_ds_assemble_message (ds_message_t message, const char *newline)
951{
952  buffer *out = buffer_create (NULL);
953  struct nt_node *node_nt, *node_header;
954  struct nt_c c_nt, c_nt2;
955  char *heading;
956  char *copyback;
957#ifdef VERBOSE
958  int i = 0;
959#endif
960
961  if (!out) {
962    LOG (LOG_CRIT, ERR_MEM_ALLOC);
963    return NULL;
964  }
965
966  node_nt = c_nt_first (message->components, &c_nt);
967  while (node_nt != NULL && node_nt->ptr != NULL)
968  {
969    ds_message_part_t block =
970      (ds_message_part_t) node_nt->ptr;
971#ifdef VERBOSE
972    LOGDEBUG ("assembling component %d", i);
973#endif
974
975    /* Assemble headers */
976
977    if (block->headers != NULL && block->headers->items > 0)
978    {
979      node_header = c_nt_first (block->headers, &c_nt2);
980      while (node_header != NULL)
981      {
982        char *data;
983        ds_header_t current_header =
984          (ds_header_t) node_header->ptr;
985
986        data = (current_header->original_data == NULL) ? current_header->data :
987               current_header->original_data;
988
989        heading = malloc(
990            ((current_header->heading) ? strlen(current_header->heading) : 0)
991          + ((data) ? strlen(data) : 0)
992          + 3 + strlen(newline));
993
994        if (current_header->heading != NULL &&
995            (!strncmp (current_header->heading, "From ", 5) ||
996             !strncmp (current_header->heading, "--", 2)))
997          sprintf (heading, "%s:%s%s",
998            (current_header->heading) ? current_header->heading : "",
999            (data) ? data : "", newline);
1000        else
1001          sprintf (heading, "%s: %s%s",
1002            (current_header->heading) ? current_header->heading : "",
1003            (data) ? data : "", newline);
1004
1005        buffer_cat (out, heading);
1006        free(heading);
1007        node_header = c_nt_next (block->headers, &c_nt2);
1008      }
1009    }
1010
1011    buffer_cat (out, newline);
1012
1013    /* Assemble bodies */
1014
1015    if (block->original_signed_body != NULL && message->protect)
1016      buffer_cat (out, block->original_signed_body->data);
1017    else
1018      buffer_cat (out, block->body->data);
1019
1020    if (block->terminating_boundary != NULL)
1021    {
1022      buffer_cat (out, "--");
1023      buffer_cat (out, block->terminating_boundary);
1024    }
1025
1026    node_nt = c_nt_next (message->components, &c_nt);
1027#ifdef VERBOSE
1028    i++;
1029#endif
1030
1031    if (node_nt != NULL && node_nt->ptr != NULL)
1032      buffer_cat (out, newline);
1033  }
1034
1035  copyback = out->data;
1036  out->data = NULL;
1037  buffer_destroy (out);
1038  return copyback;
1039}
1040
1041/*
1042 * _ds_{push,pop,match,extract}_boundary
1043 *
1044 * DESCRIPTION
1045 *   these functions maintain and service a boundary "stack" on the message
1046 */
1047
1048int
1049_ds_push_boundary (struct nt *stack, const char *boundary)
1050{
1051  char *y;
1052
1053  if (boundary == NULL || boundary[0] == 0)
1054    return EINVAL;
1055
1056  y = malloc (strlen (boundary) + 3);
1057  if (y == NULL)
1058    return EUNKNOWN;
1059
1060  sprintf (y, "--%s", boundary);
1061  nt_add (stack, (char *) y);
1062  free(y);
1063
1064  return 0;
1065}
1066
1067char *
1068_ds_pop_boundary (struct nt *stack)
1069{
1070  struct nt_node *node, *last_node = NULL, *parent_node = NULL;
1071  struct nt_c c;
1072  char *boundary = NULL;
1073
1074  node = c_nt_first (stack, &c);
1075  while (node != NULL)
1076  {
1077    parent_node = last_node;
1078    last_node = node;
1079    node = c_nt_next (stack, &c);
1080  }
1081  if (parent_node != NULL)
1082    parent_node->next = NULL;
1083  else
1084    stack->first = NULL;
1085
1086  if (last_node == NULL)
1087    return NULL;
1088
1089  boundary = strdup (last_node->ptr);
1090
1091  free (last_node->ptr);
1092  free (last_node);
1093
1094  return boundary;
1095}
1096
1097int
1098_ds_match_boundary (struct nt *stack, const char *buff)
1099{
1100  struct nt_node *node;
1101  struct nt_c c;
1102
1103  node = c_nt_first (stack, &c);
1104  while (node != NULL)
1105  {
1106    if (!strncmp (buff, node->ptr, strlen (node->ptr)))
1107    {
1108      return 1;
1109    }
1110    node = c_nt_next (stack, &c);
1111  }
1112  return 0;
1113}
1114
1115int
1116_ds_extract_boundary (char *buf, size_t size, char *mem)
1117{
1118  char *data, *ptr, *ptrptr;
1119
1120  if (mem == NULL)
1121    return EINVAL;
1122
1123  data = strdup(mem);
1124  if (data == NULL) {
1125    LOG(LOG_CRIT, ERR_MEM_ALLOC);
1126    return EUNKNOWN;
1127  }
1128
1129  for(ptr=data;ptr<(data+strlen(data));ptr++) {
1130    if (!strncasecmp(ptr, "boundary", 8)) {
1131      ptr = strchr(ptr, '=');
1132      if (ptr == NULL) {
1133        free(data);
1134        return EFAILURE;
1135      }
1136      ptr++;
1137      while(isspace((int) ptr[0]))
1138        ptr++;
1139      if (ptr[0] == '"')
1140        ptr++;
1141      strtok_r(ptr, " \";\n\t", &ptrptr);
1142      strlcpy(buf, ptr, size);
1143      free(data);
1144      return 0;
1145    }
1146  }
1147
1148  free(data);
1149  return EFAILURE;
1150}
1151
1152/*
1153 * _ds_find_header (ds_message_t message, consr char *heading) {
1154 *
1155 * DESCRIPTION
1156 *   finds a header and returns its value
1157 *
1158 * INPUT ARGUMENTS
1159 *   message     the message structure to search
1160 *   heading    the heading to search for
1161 *   flags      optional search flags
1162 *
1163 * RETURN VALUES
1164 *   a pointer to the header structure's value
1165 *
1166 */
1167
1168char *
1169_ds_find_header (ds_message_t message, const char *heading) {
1170  ds_message_part_t block;
1171  ds_header_t head;
1172  struct nt_node *node_nt;
1173
1174  if (message->components->first) {
1175    if ((block = message->components->first->ptr)==NULL)
1176      return NULL;
1177    if (block->headers == NULL)
1178      return NULL;
1179  } else {
1180    return NULL;
1181  }
1182
1183  node_nt = block->headers->first;
1184  while(node_nt != NULL) {
1185    head = (ds_header_t) node_nt->ptr;
1186    if (head && !strcasecmp(head->heading, heading)) {
1187      return head->data;
1188    }
1189    node_nt = node_nt->next;
1190  }
1191
1192  return NULL;
1193}
1194
1195int _ds_hex2dec(unsigned char hex) {
1196  switch (hex) {
1197    case '0': return 0;
1198    case '1': return 1;
1199    case '2': return 2;
1200    case '3': return 3;
1201    case '4': return 4;
1202    case '5': return 5;
1203    case '6': return 6;
1204    case '7': return 7;
1205    case '8': return 8;
1206    case '9': return 9;
1207    case 'a': case 'A': return 10;
1208    case 'b': case 'B': return 11;
1209    case 'c': case 'C': return 12;
1210    case 'd': case 'D': return 13;
1211    case 'e': case 'E': return 14;
1212    case 'f': case 'F': return 15;
1213    default: return -1;
1214  }
1215}
1216
1217/*
1218 * _ds_strip_html(const char *html)
1219 *
1220 * DESCRIPTION
1221 *    strip html tags from the supplied message
1222 *
1223 * INPUT ARGUMENTS
1224 *     html encoded message body
1225 *
1226 * RETURN VALUES
1227 *   a pointer to the allocated character array containing the
1228 *   stripped message
1229 *
1230 */
1231
1232char *
1233_ds_strip_html (const char *html)
1234{
1235#ifdef VERBOSE
1236  LOGDEBUG("stripping HTML tags from message block");
1237#endif
1238  size_t j = 0, k = 0, i = 0;
1239  int visible = 1;
1240  int closing_td_tag = 0;
1241  char *html2;
1242  const char *cdata_close_tag = NULL;
1243
1244  if(!html)
1245    return NULL;
1246
1247  static struct {
1248    unsigned int id;
1249    char *entity;
1250  }
1251  charset[] = {
1252    {   32, "&nbsp;"    }, {  34, "&quot;"    }, {  34, "&quot;"    }, {  38, "&amp;"     },
1253    {   38, "&amp;"     }, {  39, "&apos;"    }, {  60, "&lt;"      }, {  60, "&lt;"      },
1254    {   62, "&gt;"      }, {  62, "&gt;"      }, { 160, "&nbsp;"    }, { 161, "&iexcl;"   },
1255    {  162, "&cent;"    }, { 163, "&pound;"   }, { 164, "&curren;"  }, { 165, "&yen;"     },
1256    {  166, "&brvbar;"  }, { 167, "&sect;"    }, { 168, "&uml;"     }, { 169, "&copy;"    },
1257    {  170, "&ordf;"    }, { 171, "&laquo;"   }, { 172, "&not;"     }, { 173, "&shy;"     },
1258    {  174, "&reg;"     }, { 175, "&macr;"    }, { 176, "&deg;"     }, { 177, "&plusmn;"  },
1259    {  178, "&sup2;"    }, { 179, "&sup3;"    }, { 180, "&acute;"   }, { 181, "&micro;"   },
1260    {  182, "&para;"    }, { 183, "&middot;"  }, { 184, "&cedil;"   }, { 185, "&sup1;"    },
1261    {  186, "&ordm;"    }, { 187, "&raquo;"   }, { 188, "&frac14;"  }, { 189, "&frac12;"  },
1262    {  190, "&frac34;"  }, { 191, "&iquest;"  }, { 192, "&Agrave;"  }, { 193, "&Aacute;"  },
1263    {  194, "&Acirc;"   }, { 195, "&Atilde;"  }, { 196, "&Auml;"    }, { 197, "&Aring;"   },
1264    {  198, "&AElig;"   }, { 199, "&Ccedil;"  }, { 200, "&Egrave;"  }, { 201, "&Eacute;"  },
1265    {  202, "&Ecirc;"   }, { 203, "&Euml;"    }, { 204, "&Igrave;"  }, { 205, "&Iacute;"  },
1266    {  206, "&Icirc;"   }, { 207, "&Iuml;"    }, { 208, "&ETH;"     }, { 209, "&Ntilde;"  },
1267    {  210, "&Ograve;"  }, { 211, "&Oacute;"  }, { 212, "&Ocirc;"   }, { 213, "&Otilde;"  },
1268    {  214, "&Ouml;"    }, { 215, "&times;"   }, { 216, "&Oslash;"  }, { 217, "&Ugrave;"  },
1269    {  218, "&Uacute;"  }, { 219, "&Ucirc;"   }, { 220, "&Uuml;"    }, { 221, "&Yacute;"  },
1270    {  222, "&THORN;"   }, { 223, "&szlig;"   }, { 224, "&agrave;"  }, { 225, "&aacute;"  },
1271    {  226, "&acirc;"   }, { 227, "&atilde;"  }, { 228, "&auml;"    }, { 229, "&aring;"   },
1272    {  230, "&aelig;"   }, { 231, "&ccedil;"  }, { 232, "&egrave;"  }, { 233, "&eacute;"  },
1273    {  234, "&ecirc;"   }, { 235, "&euml;"    }, { 236, "&igrave;"  }, { 237, "&iacute;"  },
1274    {  238, "&icirc;"   }, { 239, "&iuml;"    }, { 240, "&eth;"     }, { 241, "&ntilde;"  },
1275    {  242, "&ograve;"  }, { 243, "&oacute;"  }, { 244, "&ocirc;"   }, { 245, "&otilde;"  },
1276    {  246, "&ouml;"    }, { 247, "&divide;"  }, { 248, "&oslash;"  }, { 249, "&ugrave;"  },
1277    {  250, "&uacute;"  }, { 251, "&ucirc;"   }, { 252, "&uuml;"    }, { 253, "&yacute;"  },
1278    {  254, "&thorn;"   }, { 255, "&yuml;"    }, { 338, "&OElig;"   }, { 339, "&oelig;"   },
1279    {  352, "&Scaron;"  }, { 353, "&scaron;"  }, { 376, "&Yuml;"    }, { 402, "&fnof;"    },
1280    {  710, "&circ;"    }, { 732, "&tilde;"   }, { 913, "&Alpha;"   }, { 914, "&Beta;"    },
1281    {  915, "&Gamma;"   }, { 916, "&Delta;"   }, { 917, "&Epsilon;" }, { 918, "&Zeta;"    },
1282    {  919, "&Eta;"     }, { 920, "&Theta;"   }, { 921, "&Iota;"    }, { 922, "&Kappa;"   },
1283    {  923, "&Lambda;"  }, { 924, "&Mu;"      }, { 925, "&Nu;"      }, { 926, "&Xi;"      },
1284    {  927, "&Omicron;" }, { 928, "&Pi;"      }, { 929, "&Rho;"     }, { 931, "&Sigma;"   },
1285    {  932, "&Tau;"     }, { 933, "&Upsilon;" }, { 934, "&Phi;"     }, { 935, "&Chi;"     },
1286    {  936, "&Psi;"     }, { 937, "&Omega;"   }, { 945, "&alpha;"   }, { 946, "&beta;"    },
1287    {  947, "&gamma;"   }, { 948, "&delta;"   }, { 949, "&epsilon;" }, { 950, "&zeta;"    },
1288    {  951, "&eta;"     }, { 952, "&theta;"   }, { 953, "&iota;"    }, { 954, "&kappa;"   },
1289    {  955, "&lambda;"  }, { 956, "&mu;"      }, { 957, "&nu;"      }, { 958, "&xi;"      },
1290    {  959, "&omicron;" }, { 960, "&pi;"      }, { 961, "&rho;"     }, { 962, "&sigmaf;"  },
1291    {  963, "&sigma;"   }, { 964, "&tau;"     }, { 965, "&upsilon;" }, { 966, "&phi;"     },
1292    {  967, "&chi;"     }, { 968, "&psi;"     }, { 969, "&omega;"   }, { 977, "&thetasym" },
1293    {  978, "&upsih;"   }, { 982, "&piv;"     }, {8194, "&ensp;"    }, {8195, "&emsp;"    },
1294    { 8201, "&thinsp;"  }, {8204, "&zwnj;"    }, {8205, "&zwj;"     }, {8206, "&lrm;"     },
1295    { 8207, "&rlm;"     }, {8211, "&ndash;"   }, {8212, "&mdash;"   }, {8216, "&lsquo;"   },
1296    { 8217, "&rsquo;"   }, {8218, "&sbquo;"   }, {8220, "&ldquo;"   }, {8221, "&rdquo;"   },
1297    { 8222, "&bdquo;"   }, {8224, "&dagger;"  }, {8225, "&Dagger;"  }, {8226, "&bull;"    },
1298    { 8230, "&hellip;"  }, {8240, "&permil;"  }, {8242, "&prime;"   }, {8243, "&Prime;"   },
1299    { 8249, "&lsaquo;"  }, {8250, "&rsaquo;"  }, {8254, "&oline;"   }, {8260, "&frasl;"   },
1300    { 8364, "&euro;"    }, {8465, "&image;"   }, {8472, "&weierp;"  }, {8476, "&real;"    },
1301    { 8482, "&trade;"   }, {8501, "&alefsym;" }, {8592, "&larr;"    }, {8593, "&uarr;"    },
1302    { 8594, "&rarr;"    }, {8595, "&darr;"    }, {8596, "&harr;"    }, {8629, "&crarr;"   },
1303    { 8656, "&lArr;"    }, {8657, "&uArr;"    }, {8658, "&rArr;"    }, {8659, "&dArr;"    },
1304    { 8660, "&hArr;"    }, {8704, "&forall;"  }, {8706, "&part;"    }, {8707, "&exist;"   },
1305    { 8709, "&empty;"   }, {8711, "&nabla;"   }, {8712, "&isin;"    }, {8713, "&notin;"   },
1306    { 8715, "&ni;"      }, {8719, "&prod;"    }, {8721, "&sum;"     }, {8722, "&minus;"   },
1307    { 8727, "&lowast;"  }, {8730, "&radic;"   }, {8733, "&prop;"    }, {8734, "&infin;"   },
1308    { 8736, "&ang;"     }, {8743, "&and;"     }, {8744, "&or;"      }, {8745, "&cap;"     },
1309    { 8746, "&cup;"     }, {8747, "&int;"     }, {8756, "&there4;"  }, {8764, "&sim;"     },
1310    { 8773, "&cong;"    }, {8776, "&asymp;"   }, {8800, "&ne;"      }, {8801, "&equiv;"   },
1311    { 8804, "&le;"      }, {8805, "&ge;"      }, {8834, "&sub;"     }, {8835, "&sup;"     },
1312    { 8836, "&nsub;"    }, {8838, "&sube;"    }, {8839, "&supe;"    }, {8853, "&oplus;"   },
1313    { 8855, "&otimes;"  }, {8869, "&perp;"    }, {8901, "&sdot;"    }, {8968, "&lceil;"   },
1314    { 8969, "&rceil;"   }, {8970, "&lfloor;"  }, {8971, "&rfloor;"  }, {9001, "&lang;"    },
1315    { 9002, "&rang;"    }, {9674, "&loz;"     }, {9824, "&spades;"  }, {9827, "&clubs;"   },
1316    { 9829, "&hearts;"  }, {9830, "&diams;"   }
1317  };
1318  int num_chars = sizeof(charset) / sizeof(charset[0]);
1319
1320  static struct {
1321    char *open_tag;
1322    char *uri_tag;
1323  }
1324  uritag[] = {
1325    {          "<a", "href"       }, {        "<img", "src"        }, {      "<input", "src"        },
1326    {     "<iframe", "src"        }, {      "<frame", "src"        }, {     "<script", "src"        },
1327    {       "<form", "action"     }, {      "<embed", "src"        }, {       "<area", "href"       },
1328    {       "<base", "href"       }, {       "<link", "href"       }, {     "<source", "src"        },
1329    {       "<body", "background" }, { "<blockquote", "cite"       }, {          "<q", "cite"       },
1330    {        "<ins", "cite"       }, {        "<del", "cite"       }
1331  };
1332  int num_uri = sizeof(uritag) / sizeof(uritag[0]);
1333
1334  size_t len = strlen(html);
1335  html2 = malloc(len+1);
1336
1337  if (html2 == NULL) {
1338    LOG (LOG_CRIT, ERR_MEM_ALLOC);
1339    return NULL;
1340  }
1341
1342  for (i = 0; i < len; i++) {
1343    if (html[i] == '<') {
1344      if (cdata_close_tag) {
1345        if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) {
1346          i += strlen(cdata_close_tag) - 1;
1347          cdata_close_tag = NULL;
1348        }
1349        continue;
1350      } else if (strncasecmp(html + i, "</td>", 5) == 0) {
1351        i += 4;
1352        closing_td_tag = 1;
1353        continue;
1354      } else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) {
1355        if (j > 0 && !isspace(html2[j-1])) {
1356          html2[j++]=' ';
1357        }
1358        visible = 0;
1359      } else {
1360        closing_td_tag = 0;
1361        visible = 1;
1362      }
1363      k = i + 1;
1364
1365      if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) ||
1366                           (html[k] >= 97 && html[k] <= 122) ||
1367                           (html[k] == 47) ||
1368                           (html[k] == 33) ))) {
1369        /* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */
1370        visible = 1;
1371        html2[j++]=html[i];
1372        i = k;
1373        const char *w = &(html[k]);
1374        while (j < len && (size_t)(w - html) < len && *w != '<') {
1375          html2[j++]=*w;
1376          w++;
1377          i++;
1378        }
1379        continue;
1380      } else if (html[k]) {
1381        /* find the end of the tag */
1382        while (k < len && html[k] != '<' && html[k] != '>') {k++;}
1383
1384        /* if we've got a tag with a uri, save the address to print later. */
1385        char *url_tag = " ";
1386        int tag_offset = 0, x = 0, y = 0;
1387        for (y = 0; y < num_uri; y++) {
1388          x = strlen(uritag[y].open_tag);
1389          if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) {
1390            url_tag = uritag[y].uri_tag;
1391            tag_offset = i + x + 1;
1392            break;
1393          }
1394        }
1395        /* tag with uri found */
1396        if (tag_offset > 0) {
1397          size_t url_start;         /* start of url tag inclusive [ */
1398          size_t url_tag_len = strlen(url_tag);
1399          char delim = ' ';
1400          /* find start of uri */
1401          for (url_start = tag_offset; url_start <= k; url_start++) {
1402            if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) {
1403              url_start += url_tag_len;
1404              while (html[url_start] && isspace(html[url_start])) {url_start++;}   /* remove spaces before = */
1405              if (html[url_start] == '=') {
1406                url_start++;
1407                while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */
1408                if (html[url_start] == '"') {
1409                  delim = '"';
1410                  url_start++;
1411                } else if (html[url_start] == '\'') {
1412                  delim = '\'';
1413                  url_start++;
1414                } else {
1415                  delim = '>';
1416                }
1417                break;
1418              } else {
1419                /* Start of uri tag found but no '=' after the tag.
1420                 * Skip the whole tag.
1421                 */
1422                break;
1423              }
1424            } else if ((url_start - tag_offset) >= 50) {
1425              /* The length of the html tag is over 50 characters long without
1426               * finding the start of the url/uri. Skip the whole tag.
1427               */
1428              break;
1429            }
1430          }
1431          /* find end of uri */
1432          if (delim != ' ') {
1433            if (url_start < len &&
1434                (strncasecmp(html + url_start, "http:", 5) == 0 ||
1435                 strncasecmp(html + url_start, "https:", 6) == 0 ||
1436                 strncasecmp(html + url_start, "ftp:", 4) == 0)) {
1437              html2[j++]=' ';
1438              const char *w = &(html[url_start]);
1439              /* html2 is a buffer of len + 1, where the +1 is for NULL
1440               * termination. This means we only want to loop to len
1441               * since we will replace html2[j] right after the loop.
1442               */
1443              while (j < len && (size_t)(w - html) < len && *w != delim) {
1444                html2[j++]=*w;
1445                w++;
1446              }
1447              html2[j++]=' ';
1448            }
1449          }
1450        } else if (strncasecmp(html + i, "<p>", 3) == 0
1451                || strncasecmp(html + i, "<p ", 3) == 0
1452                || strncasecmp(html + i, "<p\t", 3) == 0
1453                || strncasecmp(html + i, "<tr", 3) == 0
1454                || strncasecmp(html + i, "<option", 7) == 0
1455                || strncasecmp(html + i, "<br", 3) == 0
1456                || strncasecmp(html + i, "<li", 3) == 0
1457                || strncasecmp(html + i, "<div", 4) == 0
1458                || strncasecmp(html + i, "</select>", 9) == 0
1459                || strncasecmp(html + i, "</table>", 8) == 0) {
1460          if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') {
1461            html2[j++] = '\n';
1462          }
1463        } else if (strncasecmp(html + i, "<applet", 7) == 0) {
1464          cdata_close_tag = "</applet>";
1465        } else if (strncasecmp(html + i, "<embed", 6) == 0) {
1466          cdata_close_tag = "</embed>";
1467        } else if (strncasecmp(html + i, "<frameset", 9) == 0) {
1468          cdata_close_tag = "</frameset>";
1469        } else if (strncasecmp(html + i, "<frame", 6) == 0) {
1470          cdata_close_tag = "</frame>";
1471        } else if (strncasecmp(html + i, "<iframe", 7) == 0) {
1472          cdata_close_tag = "</iframe>";
1473        } else if (strncasecmp(html + i, "<noembed", 8) == 0) {
1474          cdata_close_tag = "</noembed>";
1475        } else if (strncasecmp(html + i, "<noscript", 9) == 0) {
1476          cdata_close_tag = "</noscript>";
1477        } else if (strncasecmp(html + i, "<object", 7) == 0) {
1478          cdata_close_tag = "</object>";
1479        } else if (strncasecmp(html + i, "<script", 7) == 0) {
1480          cdata_close_tag = "</script>";
1481        } else if (strncasecmp(html + i, "<style", 6) == 0) {
1482          cdata_close_tag = "</style>";
1483        }
1484        i = (html[k] == '<' || html[k] == '\0')? k - 1: k;
1485        continue;
1486      }
1487    } else if (cdata_close_tag) {
1488      continue;
1489    } else if (!isspace(html[i])) {
1490      visible = 1;
1491    }
1492
1493    if (strncmp(html+i,"&#",2)==0) {
1494      int x = 0;
1495      const char *w = &(html[i+2]);
1496      while (*w == '0') {i++;w++;}
1497      char n[5];
1498      if (html[i+4] && html[i+4] == ';'
1499          && isdigit(html[i+2])
1500          && isdigit(html[i+3])) {
1501        n[0] = html[i+2];
1502        n[1] = html[i+3];
1503        n[2] = 0;
1504        x = atoi(n);
1505        if (x <= 255 && x >= 32)
1506          html2[j++] = x;
1507        i += 4;
1508      } else if (html[i+6]
1509                  && html[i+6] == ';'
1510                  && isdigit(html[i+2])
1511                  && isdigit(html[i+3])
1512                  && isdigit(html[i+4])
1513                  && isdigit(html[i+5])) {
1514        n[0] = html[i+2];
1515        n[1] = html[i+3];
1516        n[2] = html[i+4];
1517        n[3] = html[i+5];
1518        n[4] = 0;
1519        x = atoi(n);
1520        if (x <= 255 && x >= 32)
1521          html2[j++] = x;
1522        i += 6;
1523      } else {
1524        const char *w = &(html[i]);
1525        while (*w != ';' && *w != ' ' && *w != '\t' && *w != '\0') {i++;w++;}
1526      }
1527      visible = 0;
1528      continue;
1529    } else if (html[i] == '&') {
1530      int x = 0, y = 0;
1531      for (y = 0; y < num_chars; y++) {
1532        x = strlen(charset[y].entity);
1533        if (strncasecmp(html+i,charset[y].entity,x)==0) {
1534          if (charset[y].id <= 255)
1535            html2[j++] = charset[y].id;
1536          i += x-1;
1537          visible = 0;
1538          continue;
1539        }
1540      }
1541    }
1542
1543    if (j < len && visible)
1544      html2[j++] = html[i];
1545
1546    if (j >= len)
1547      i = j = len;
1548  }
1549
1550  html2[j] = '\0';
1551  return (char *)html2;
1552}
Note: See TracBrowser for help on using the repository browser.