Context Navigation

source: npl/mailserver/dspam/dspam-3.10.2/src/decode.c @ c5c522c

gcc484ntopperl-5.22

Last change on this file since c5c522c was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago
initial commit, transferred from cleaned syn3 svn tree
Property mode set to `100644`
File size: 43.1 KB

Line
1	/* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */
2
3	/*
4	DSPAM
5	COPYRIGHT (C) 2002-2012 DSPAM PROJECT
6
7	This program is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Affero General Public License as
9	published by the Free Software Foundation, either version 3 of the
10	License, or (at your option) any later version.
11
12	This program is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU Affero General Public License for more details.
16
17	You should have received a copy of the GNU Affero General Public License
18	along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20	*/
21
22	/*
23	* decode.c - message decoding and parsing
24	*
25	* DESCRIPTION
26	* This set of functions performs parsing and decoding of a message and
27	* embeds its components into a ds_message_t structure, suitable for
28	* logical access.
29	*/
30
31	#ifdef HAVE_CONFIG_H
32	#include <auto-config.h>
33	#endif
34
35	#include <stdio.h>
36	#include <string.h>
37	#include <stdlib.h>
38	#include <ctype.h>
39
40	#include "decode.h"
41	#include "error.h"
42	#include "util.h"
43	#include "language.h"
44	#include "buffer.h"
45	#include "base64.h"
46	#include "libdspam.h"
47
48	/*
49	* _ds_actualize_message (const char *message)
50	*
51	* DESCRIPTION
52	* primary message parser
53	*
54	* this function performs all decoding and actualization of the message
55	* into the message structures defined in the .h
56	*
57	* INPUT ARGUMENTS
58	* message message to decode
59	*
60	* RETURN VALUES
61	* pointer to an allocated message structure (ds_message_t), NULL on failure
62	*/
63
64	ds_message_t
65	_ds_actualize_message (const char *message)
66	{
67	char *line = NULL;
68	char *in = NULL;
69	char *m_in = NULL;
70	ds_message_part_t current_block;
71	ds_header_t current_heading = NULL;
72	struct nt *boundaries = NULL;
73	ds_message_t out = NULL;
74	int block_position = BP_HEADER;
75	int in_content = 0;
76
77	if (!message \|\| !(*message))
78	goto MEMFAIL;
79
80	if (!(in = strdup(message)))
81	goto MEMFAIL;
82
83	m_in = in;
84
85	boundaries = nt_create (NT_CHAR);
86	if (!boundaries)
87	goto MEMFAIL;
88
89	out = (ds_message_t) calloc (1, sizeof (struct _ds_message));
90	if (!out)
91	goto MEMFAIL;
92
93	out->components = nt_create (NT_PTR);
94	if (!out->components)
95	goto MEMFAIL;
96
97	current_block = _ds_create_message_part ();
98	if (!current_block)
99	goto MEMFAIL;
100
101	if (nt_add (out->components, (void *) current_block) == NULL)
102	goto MEMFAIL;
103
104	/* Read the message from memory */
105
106	line = strsep (&in, "\n");
107	while (line)
108	{
109
110	/* Header processing */
111
112	if (block_position == BP_HEADER)
113	{
114
115	/* If we see two boundaries converged on top of one another */
116
117	if (_ds_match_boundary (boundaries, line))
118	{
119
120	/* Add the boundary as the terminating boundary */
121
122	current_block->terminating_boundary = strdup (line + 2);
123	current_block->original_encoding = current_block->encoding;
124
125	_ds_decode_headers(current_block);
126	current_block = _ds_create_message_part ();
127
128	if (!current_block)
129	goto MEMFAIL;
130
131	if (nt_add (out->components, (void *) current_block) == NULL)
132	goto MEMFAIL;
133
134	block_position = BP_HEADER;
135	}
136
137	/* Concatenate multiline headers to the original header field data */
138
139	else if (line[0] == 32 \|\| line[0] == '\t')
140	{
141	if (current_heading)
142	{
143	char eow, ptr;
144
145	ptr = realloc (current_heading->data,
146	strlen (current_heading->data) + strlen (line) + 2);
147	if (ptr)
148	{
149	current_heading->data = ptr;
150	strcat (current_heading->data, "\n");
151	strcat (current_heading->data, line);
152	} else {
153	goto MEMFAIL;
154	}
155
156	/* Our concatenated data doesn't have any whitespace between lines */
157	for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { }
158
159	ptr =
160	realloc (current_heading->concatenated_data,
161	strlen (current_heading->concatenated_data) + strlen (eow) + 1);
162	if (ptr)
163	{
164	current_heading->concatenated_data = ptr;
165	strcat (current_heading->concatenated_data, eow);
166	} else {
167	goto MEMFAIL;
168	}
169
170	if (current_heading->original_data) {
171	ptr =
172	realloc (current_heading->original_data,
173	strlen (current_heading->original_data) +
174	strlen (line) + 2);
175	if (ptr) {
176	current_heading->original_data = ptr;
177	strcat (current_heading->original_data, "\n");
178	strcat (current_heading->original_data, line);
179	} else {
180	goto MEMFAIL;
181	}
182	}
183
184	_ds_analyze_header (current_block, current_heading, boundaries);
185	}
186	}
187
188	/* New header field when LF or CRLF is not found */
189
190	else if (line[0] != 0 && line[0] != 13)
191	{
192	ds_header_t header = _ds_create_header_field (line);
193
194	if (header != NULL)
195	{
196	_ds_analyze_header (current_block, header, boundaries);
197	current_heading = header;
198	nt_add (current_block->headers, header);
199	}
200
201
202	/* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */
203
204	} else {
205	block_position = BP_BODY;
206	}
207	}
208
209	/* Body processing */
210
211	else if (block_position == BP_BODY)
212	{
213	/* Look for a boundary in the header of a part */
214
215	if (!strncasecmp (line, "Content-Type", 12)
216	\|\| ((line[0] == 32 \|\| line[0] == 9) && in_content))
217	{
218	char boundary[128];
219	in_content = 1;
220	if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) {
221	if (!_ds_match_boundary (boundaries, boundary)) {
222	_ds_push_boundary (boundaries, boundary);
223	free(current_block->boundary);
224	current_block->boundary = strdup (boundary);
225	}
226	} else {
227	_ds_push_boundary (boundaries, "");
228	}
229	} else {
230	in_content = 0;
231	}
232
233	/* Multipart boundary was reached; move onto next block */
234
235	if (_ds_match_boundary (boundaries, line))
236	{
237
238	/* Add the boundary as the terminating boundary */
239
240	current_block->terminating_boundary = strdup (line + 2);
241	current_block->original_encoding = current_block->encoding;
242
243	_ds_decode_headers(current_block);
244	current_block = _ds_create_message_part ();
245
246	if (!current_block)
247	goto MEMFAIL;
248
249	if (nt_add (out->components, (void *) current_block) == NULL)
250	goto MEMFAIL;
251
252	block_position = BP_HEADER;
253	}
254
255	/* Plain old message (or part) body */
256
257	else {
258	buffer_cat (current_block->body, line);
259
260	/* Don't add extra \n at the end of message's body */
261
262	if (in != NULL)
263	buffer_cat (current_block->body, "\n");
264	}
265	}
266
267	line = strsep (&in, "\n");
268	} /* while (line) */
269
270	_ds_decode_headers(current_block);
271
272	free (m_in);
273	nt_destroy (boundaries);
274	return out;
275
276	MEMFAIL:
277	if (m_in) free(m_in);
278	if (boundaries) nt_destroy (boundaries);
279	if (out) _ds_destroy_message(out);
280	LOG (LOG_CRIT, ERR_MEM_ALLOC);
281	return NULL;
282	}
283
284	/*
285	* _ds_create_message_part
286	*
287	* DESCRIPTION
288	* create and initialize a new message block component
289	*
290	* RETURN VALUES
291	* pointer to an allocated message block (ds_message_part_t), NULL on failure
292	*
293	*/
294
295	ds_message_part_t
296	_ds_create_message_part (void)
297	{
298	ds_message_part_t block =
299	(ds_message_part_t) calloc (1, sizeof (struct _ds_message_part));
300
301	if (!block)
302	goto MEMFAIL;
303
304	block->headers = nt_create (NT_PTR);
305	if (!block->headers)
306	goto MEMFAIL;
307
308	block->body = buffer_create (NULL);
309	if (!block->body)
310	goto MEMFAIL;
311
312	block->encoding = EN_UNKNOWN;
313	block->media_type = MT_TEXT;
314	block->media_subtype = MST_PLAIN;
315	block->original_encoding = EN_UNKNOWN;
316	block->content_disposition = PCD_UNKNOWN;
317
318	/* Not really necessary, but.. */
319
320	block->boundary = NULL;
321	block->terminating_boundary = NULL;
322	block->original_signed_body = NULL;
323
324
325	return block;
326
327	MEMFAIL:
328	if (block) {
329	buffer_destroy(block->body);
330	nt_destroy(block->headers);
331	free(block);
332	}
333	LOG (LOG_CRIT, ERR_MEM_ALLOC);
334	return NULL;
335	}
336
337	/*
338	* _ds_create_header_field(const char *heading)
339	*
340	* DESCRIPTION
341	* create and initialize a new header structure
342	*
343	* INPUT ARGUMENTS
344	* heading plain text heading (e.g. "To: Mom")
345	*
346	* RETURN VALUES
347	* pointer to an allocated header structure (ds_header_t), NULL on failure
348	*/
349
350	ds_header_t
351	_ds_create_header_field (const char *heading)
352	{
353	char *in = strdup(heading);
354	char ptr, m = in, *data;
355	ds_header_t header =
356	(ds_header_t) calloc (1, sizeof (struct _ds_header_field));
357
358	if (!header \|\| !in)
359	goto MEMFAIL;
360
361	ptr = strsep (&in, ":");
362	if (ptr) {
363	header->heading = strdup (ptr);
364	if (!header->heading)
365	goto MEMFAIL;
366	else
367	{
368	if (!in)
369	{
370	LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't "
371	"contains `:' character", __FILE__, __LINE__, header->heading);
372
373	/* Use empty string as data as fallback for comtinue processing. */
374
375	in = "";
376	}
377	else
378	{
379	/* Skip white space */
380	while (in == 32 \|\| in == 9)
381	++in;
382	}
383
384	data = strdup (in);
385	if (!data)
386	goto MEMFAIL;
387
388	header->data = data;
389	header->concatenated_data = strdup(data);
390	}
391	}
392
393	free (m);
394	return header;
395
396	MEMFAIL:
397	free(header);
398	free(m);
399	LOG (LOG_CRIT, ERR_MEM_ALLOC);
400	return NULL;
401	}
402
403	/*
404	* _ds_decode_headers (ds_message_part_t block)
405	*
406	* DESCRIPTION
407	* decodes in-line encoded headers
408	*
409	* RETURN VALUES
410	* returns 0 on success
411	*/
412
413	int
414	_ds_decode_headers (ds_message_part_t block) {
415	#ifdef VERBOSE
416	LOGDEBUG("decoding headers in message block");
417	#endif
418	char ptr, dptr, rest, enc;
419	ds_header_t header;
420	struct nt_node *node_nt;
421	struct nt_c c_nt;
422	long decoded_len;
423
424	node_nt = c_nt_first(block->headers, &c_nt);
425	while(node_nt != NULL) {
426	long enc_offset;
427	header = (ds_header_t) node_nt->ptr;
428
429	for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++)
430	{
431	enc = header->concatenated_data + enc_offset;
432
433	if (!strncmp(enc, "=?", 2)) {
434	int was_null = 0;
435	char ptrptr, decoded = NULL;
436	long offset = (long) enc - (long) header->concatenated_data;
437
438	if (header->original_data == NULL) {
439	header->original_data = strdup(header->data);
440	was_null = 1;
441	}
442
443	strtok_r (enc, "?", &ptrptr);
444	strtok_r (NULL, "?", &ptrptr);
445	ptr = strtok_r (NULL, "?", &ptrptr);
446	dptr = strtok_r (NULL, "?", &ptrptr);
447	if (!dptr) {
448	if (was_null && header->original_data != NULL)
449	free(header->original_data);
450	if (was_null)
451	header->original_data = NULL;
452	continue;
453	}
454
455	rest = dptr + strlen (dptr);
456	if (rest[0]!=0) {
457	rest++;
458	if (rest[0]!=0) rest++;
459	}
460
461	if (ptr != NULL && (ptr[0] == 'b' \|\| ptr[0] == 'B'))
462	decoded = _ds_decode_base64 (dptr);
463	else if (ptr != NULL && (ptr[0] == 'q' \|\| ptr[0] == 'Q'))
464	decoded = _ds_decode_quoted (dptr);
465
466	decoded_len = 0;
467
468	/* Append the rest of the message */
469
470	if (decoded)
471	{
472	char *new_alloc;
473
474	decoded_len = strlen(decoded);
475	new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2);
476	if (new_alloc == NULL) {
477	LOG (LOG_CRIT, ERR_MEM_ALLOC);
478	}
479	else
480	{
481	if (offset)
482	strncpy(new_alloc, header->concatenated_data, offset);
483
484	strcat(new_alloc, decoded);
485	strcat(new_alloc, rest);
486	free(decoded);
487	decoded = new_alloc;
488	}
489	}
490
491	if (decoded) {
492	enc_offset += (decoded_len-1);
493	free(header->concatenated_data);
494	header->concatenated_data = decoded;
495	}
496	else if (was_null && header->original_data) {
497	free(header->original_data);
498	header->original_data = NULL;
499	}
500	else if (was_null) {
501	header->original_data = NULL;
502	}
503	}
504	}
505
506	if (header->original_data != NULL) {
507	free(header->data);
508	header->data = strdup(header->concatenated_data);
509	}
510
511	node_nt = c_nt_next(block->headers, &c_nt);
512	}
513
514	return 0;
515	}
516
517	/*
518	* _ds_analyze_header (ds_message_part_t block, ds_header_t header,
519	* struct nt *boundaries)
520	*
521	* DESCRIPTION
522	* analyzes the header passed in and performs various operations including:
523	* - setting media type and subtype
524	* - setting transfer encoding
525	* - adding newly discovered boundaries
526	*
527	* based on the heading specified. essentially all headers should be
528	* analyzed for future expansion
529	*
530	* INPUT ARGUMENTS
531	* block the message block to which the header belongs
532	* header the header to analyze
533	* boundaries a list of known boundaries found within the block
534	*/
535
536	void
537	_ds_analyze_header (
538	ds_message_part_t block,
539	ds_header_t header,
540	struct nt *boundaries)
541	{
542	if (!header \|\| !block \|\| !header->data)
543	return;
544
545	/* Content-Type header */
546
547	if (!strcasecmp (header->heading, "Content-Type"))
548	{
549	int len = strlen(header->data);
550	if (!strncasecmp (header->data, "text", 4)) {
551	block->media_type = MT_TEXT;
552	if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5))
553	block->media_subtype = MST_PLAIN;
554	else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4))
555	block->media_subtype = MST_HTML;
556	else
557	block->media_subtype = MST_OTHER;
558	}
559
560	else if (!strncasecmp (header->data, "application", 11))
561	{
562	block->media_type = MT_APPLICATION;
563	if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15))
564	block->media_subtype = MST_DSPAM_SIGNATURE;
565	else
566	block->media_subtype = MST_OTHER;
567	}
568
569	else if (!strncasecmp (header->data, "message", 7))
570	{
571	block->media_type = MT_MESSAGE;
572	if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6))
573	block->media_subtype = MST_RFC822;
574	else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11))
575	block->media_subtype = MST_INOCULATION;
576	else
577	block->media_subtype = MST_OTHER;
578	}
579
580	else if (!strncasecmp (header->data, "multipart", 9))
581	{
582	char boundary[128];
583
584	block->media_type = MT_MULTIPART;
585	if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5))
586	block->media_subtype = MST_MIXED;
587	else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11))
588	block->media_subtype = MST_ALTERNATIVE;
589	else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6))
590	block->media_subtype = MST_SIGNED;
591	else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9))
592	block->media_subtype = MST_ENCRYPTED;
593	else
594	block->media_subtype = MST_OTHER;
595
596	if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) {
597	if (!_ds_match_boundary (boundaries, boundary)) {
598	_ds_push_boundary (boundaries, boundary);
599	free(block->boundary);
600	block->boundary = strdup (boundary);
601	}
602	} else {
603	_ds_push_boundary (boundaries, "");
604	}
605	}
606	else {
607	block->media_type = MT_OTHER;
608	block->media_subtype = MST_OTHER;
609	}
610
611	}
612
613	/* Content-Transfer-Encoding */
614
615	else if (!strcasecmp (header->heading, "Content-Transfer-Encoding"))
616	{
617	if (!strncasecmp (header->data, "7bit", 4))
618	block->encoding = EN_7BIT;
619	else if (!strncasecmp (header->data, "8bit", 4))
620	block->encoding = EN_8BIT;
621	else if (!strncasecmp (header->data, "quoted-printable", 16))
622	block->encoding = EN_QUOTED_PRINTABLE;
623	else if (!strncasecmp (header->data, "base64", 6))
624	block->encoding = EN_BASE64;
625	else if (!strncasecmp (header->data, "binary", 6))
626	block->encoding = EN_BINARY;
627	else
628	block->encoding = EN_OTHER;
629	}
630
631	if (!strcasecmp (header->heading, "Content-Disposition"))
632	{
633	if (!strncasecmp (header->data, "inline", 6))
634	block->content_disposition = PCD_INLINE;
635	else if (!strncasecmp (header->data, "attachment", 10))
636	block->content_disposition = PCD_ATTACHMENT;
637	else
638	block->content_disposition = PCD_OTHER;
639	}
640
641	return;
642	}
643
644	/*
645	* _ds_destroy_message (ds_message_t message)
646	*
647	* DESCRIPTION
648	* destroys a message structure (ds_message_t)
649	*
650	* INPUT ARGUMENTS
651	* message the message structure to be destroyed
652	*/
653
654	void
655	_ds_destroy_message (ds_message_t message)
656	{
657	struct nt_node *node_nt;
658	struct nt_c c;
659
660	if (message == NULL)
661	return;
662
663	if (message->components) {
664	node_nt = c_nt_first (message->components, &c);
665	while (node_nt != NULL)
666	{
667	ds_message_part_t block = (ds_message_part_t) node_nt->ptr;
668	_ds_destroy_block(block);
669	node_nt = c_nt_next (message->components, &c);
670	}
671	nt_destroy (message->components);
672	}
673	free (message);
674	return;
675	}
676
677	/*
678	* _ds_destroy_headers (ds_message_part_t block)
679	*
680	* DESCRIPTION
681	* destroys a message block's header pairs
682	* does not free the structures themselves; these are freed at nt_destroy
683	*
684	* INPUT ARGUMENTS
685	* block the message block containing the headers to destsroy
686	*/
687
688	void
689	_ds_destroy_headers (ds_message_part_t block)
690	{
691	struct nt_node *node_nt;
692	struct nt_c c;
693
694	if (!block \|\| !block->headers)
695	return;
696
697	node_nt = c_nt_first (block->headers, &c);
698	while (node_nt != NULL)
699	{
700	ds_header_t field = (ds_header_t) node_nt->ptr;
701
702	if (field)
703	{
704	free (field->original_data);
705	free (field->heading);
706	free (field->concatenated_data);
707	free (field->data);
708	}
709	node_nt = c_nt_next (block->headers, &c);
710	}
711
712	return;
713	}
714
715	/*
716	* _ds_destroy_block (ds_message_part_t block)
717	*
718	* DESCRIPTION
719	* destroys a message block
720	*
721	* INPUT ARGUMENTS
722	* block the message block to destroy
723	*/
724
725	void
726	_ds_destroy_block (ds_message_part_t block)
727	{
728	if (!block)
729	return;
730
731	if (block->headers)
732	{
733	_ds_destroy_headers (block);
734	nt_destroy (block->headers);
735	}
736	buffer_destroy (block->body);
737	buffer_destroy (block->original_signed_body);
738	free (block->boundary);
739	free (block->terminating_boundary);
740	// free (block);
741	return;
742	}
743
744	/*
745	* _ds_decode_block (ds_message_part_t block)
746	*
747	* DESCRIPTION
748	* decodes a message block
749	*
750	* INPUT ARGUMENTS
751	* block the message block to decode
752	*
753	* RETURN VALUES
754	* a pointer to the allocated character array containing the decoded message
755	* NULL on failure
756	*/
757
758	char *
759	_ds_decode_block (ds_message_part_t block)
760	{
761	if (block->encoding == EN_BASE64)
762	return _ds_decode_base64 (block->body->data);
763	else if (block->encoding == EN_QUOTED_PRINTABLE)
764	return _ds_decode_quoted (block->body->data);
765
766	LOG (LOG_WARNING, "decoding of block encoding type %d not supported",
767	block->encoding);
768	return NULL;
769	}
770
771	/*
772	* _ds_decode_{base64,quoted,hex8bit}
773	*
774	* DESCRIPTION
775	* supporting block decoder functions
776	* these function call (or perform) specific decoding functions
777	*
778	* INPUT ARGUMENTS
779	* body encoded message body
780	*
781	* RETURN VALUES
782	* a pointer to the allocated character array containing the decoded body
783	*/
784
785	char *
786	_ds_decode_base64 (const char *body)
787	{
788	if (body == NULL)
789	return NULL;
790
791	return base64decode (body);
792	}
793
794	char *
795	_ds_decode_quoted (const char *body)
796	{
797	#ifdef VERBOSE
798	LOGDEBUG("decoding Quoted Printable encoded buffer");
799	#endif
800	if (!body)
801	return NULL;
802
803	char n, out;
804	const char end, p;
805
806	n = out = malloc(strlen(body)+1);
807	end = body + strlen(body);
808
809	if (out == NULL) {
810	LOG (LOG_CRIT, ERR_MEM_ALLOC);
811	return NULL;
812	}
813
814	for (p = body; p < end; p++, n++) {
815	if (*p == '=') {
816	if (p[1] == '\r' && p[2] == '\n') {
817	n -= 1;
818	p += 2;
819	} else if (p[1] == '\n') {
820	n -= 1;
821	p += 1;
822	} else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
823	*n = ((_ds_hex2dec((unsigned char) p[1])) << 4) \| (_ds_hex2dec((unsigned char) p[2]));
824	p += 2;
825	} else
826	n = p;
827	} else
828	n = p;
829	}
830
831	*n = '\0';
832	return (char *)out;
833	}
834
835	char *
836	_ds_decode_hex8bit (const char *body)
837	{
838	#ifdef VERBOSE
839	LOGDEBUG("decoding hexadecimal 8-bit encodings in message block");
840	#endif
841	if (!body)
842	return NULL;
843
844	char n, out;
845	const char end, p;
846
847	n = out = malloc(strlen(body)+1);
848	end = body + strlen(body);
849
850	if (out == NULL) {
851	LOG (LOG_CRIT, ERR_MEM_ALLOC);
852	return NULL;
853	}
854
855	for (p = body; p < end; p++, n++) {
856	if (*p == '%')
857	if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
858	*n = ((_ds_hex2dec((unsigned char) p[1])) << 4) \| (_ds_hex2dec((unsigned char) p[2]));
859	p += 2;
860	} else
861	n = p;
862	else
863	n = p;
864	}
865
866	*n = '\0';
867	return (char *)out;
868	}
869
870	/*
871	* _ds_encode_block (ds_message_part_t block, int encoding)
872	*
873	* DESCRIPTION
874	* encodes a message block using the encoding specified and replaces the
875	* block's message body with the encoded data
876	*
877	* INPUT ARGUMENTS
878	* block the message block to encode
879	* encoding encoding to use (EN_)
880	*
881	* RETURN VALUES
882	* returns 0 on success
883	*/
884
885	int
886	_ds_encode_block (ds_message_part_t block, int encoding)
887	{
888	/* we can't encode a block with the same encoding */
889
890	if (block->encoding == encoding)
891	return EINVAL;
892
893	/* we can't encode a block that's already encoded */
894
895	if (block->encoding == EN_BASE64 \|\| block->encoding == EN_QUOTED_PRINTABLE)
896	return EFAILURE;
897
898	if (encoding == EN_BASE64) {
899	char *encoded = _ds_encode_base64 (block->body->data);
900	buffer_destroy (block->body);
901	block->body = buffer_create (encoded);
902	free (encoded);
903	block->encoding = EN_BASE64;
904	}
905	else if (encoding == EN_QUOTED_PRINTABLE) {
906
907	/* TODO */
908
909	return 0;
910	}
911
912	LOGDEBUG("unsupported encoding: %d", encoding);
913	return 0;
914	}
915
916	/*
917	* _ds_encode_{base64,quoted}
918	*
919	* DESCRIPTION
920	* supporting block encoder functions
921	* these function call (or perform) specific encoding functions
922	*
923	* INPUT ARGUMENTS
924	* body decoded message body
925	*
926	* RETURN VALUES
927	* a pointer to the allocated character array containing the encoded body
928	*/
929
930	char *
931	_ds_encode_base64 (const char *body)
932	{
933	return base64encode (body);
934	}
935
936	/*
937	* _ds_assemble_message (ds_message_t message)
938	*
939	* DESCRIPTION
940	* assembles a message structure into a flat text message
941	*
942	* INPUT ARGUMENTS
943	* message the message structure (ds_message_t) to assemble
944	*
945	* RETURN VALUES
946	* a pointer to the allocated character array containing the text message
947	*/
948
949	char *
950	_ds_assemble_message (ds_message_t message, const char *newline)
951	{
952	buffer *out = buffer_create (NULL);
953	struct nt_node node_nt, node_header;
954	struct nt_c c_nt, c_nt2;
955	char *heading;
956	char *copyback;
957	#ifdef VERBOSE
958	int i = 0;
959	#endif
960
961	if (!out) {
962	LOG (LOG_CRIT, ERR_MEM_ALLOC);
963	return NULL;
964	}
965
966	node_nt = c_nt_first (message->components, &c_nt);
967	while (node_nt != NULL && node_nt->ptr != NULL)
968	{
969	ds_message_part_t block =
970	(ds_message_part_t) node_nt->ptr;
971	#ifdef VERBOSE
972	LOGDEBUG ("assembling component %d", i);
973	#endif
974
975	/* Assemble headers */
976
977	if (block->headers != NULL && block->headers->items > 0)
978	{
979	node_header = c_nt_first (block->headers, &c_nt2);
980	while (node_header != NULL)
981	{
982	char *data;
983	ds_header_t current_header =
984	(ds_header_t) node_header->ptr;
985
986	data = (current_header->original_data == NULL) ? current_header->data :
987	current_header->original_data;
988
989	heading = malloc(
990	((current_header->heading) ? strlen(current_header->heading) : 0)
991	+ ((data) ? strlen(data) : 0)
992	+ 3 + strlen(newline));
993
994	if (current_header->heading != NULL &&
995	(!strncmp (current_header->heading, "From ", 5) \|\|
996	!strncmp (current_header->heading, "--", 2)))
997	sprintf (heading, "%s:%s%s",
998	(current_header->heading) ? current_header->heading : "",
999	(data) ? data : "", newline);
1000	else
1001	sprintf (heading, "%s: %s%s",
1002	(current_header->heading) ? current_header->heading : "",
1003	(data) ? data : "", newline);
1004
1005	buffer_cat (out, heading);
1006	free(heading);
1007	node_header = c_nt_next (block->headers, &c_nt2);
1008	}
1009	}
1010
1011	buffer_cat (out, newline);
1012
1013	/* Assemble bodies */
1014
1015	if (block->original_signed_body != NULL && message->protect)
1016	buffer_cat (out, block->original_signed_body->data);
1017	else
1018	buffer_cat (out, block->body->data);
1019
1020	if (block->terminating_boundary != NULL)
1021	{
1022	buffer_cat (out, "--");
1023	buffer_cat (out, block->terminating_boundary);
1024	}
1025
1026	node_nt = c_nt_next (message->components, &c_nt);
1027	#ifdef VERBOSE
1028	i++;
1029	#endif
1030
1031	if (node_nt != NULL && node_nt->ptr != NULL)
1032	buffer_cat (out, newline);
1033	}
1034
1035	copyback = out->data;
1036	out->data = NULL;
1037	buffer_destroy (out);
1038	return copyback;
1039	}
1040
1041	/*
1042	* _ds_{push,pop,match,extract}_boundary
1043	*
1044	* DESCRIPTION
1045	* these functions maintain and service a boundary "stack" on the message
1046	*/
1047
1048	int
1049	_ds_push_boundary (struct nt stack, const char boundary)
1050	{
1051	char *y;
1052
1053	if (boundary == NULL \|\| boundary[0] == 0)
1054	return EINVAL;
1055
1056	y = malloc (strlen (boundary) + 3);
1057	if (y == NULL)
1058	return EUNKNOWN;
1059
1060	sprintf (y, "--%s", boundary);
1061	nt_add (stack, (char *) y);
1062	free(y);
1063
1064	return 0;
1065	}
1066
1067	char *
1068	_ds_pop_boundary (struct nt *stack)
1069	{
1070	struct nt_node node, last_node = NULL, *parent_node = NULL;
1071	struct nt_c c;
1072	char *boundary = NULL;
1073
1074	node = c_nt_first (stack, &c);
1075	while (node != NULL)
1076	{
1077	parent_node = last_node;
1078	last_node = node;
1079	node = c_nt_next (stack, &c);
1080	}
1081	if (parent_node != NULL)
1082	parent_node->next = NULL;
1083	else
1084	stack->first = NULL;
1085
1086	if (last_node == NULL)
1087	return NULL;
1088
1089	boundary = strdup (last_node->ptr);
1090
1091	free (last_node->ptr);
1092	free (last_node);
1093
1094	return boundary;
1095	}
1096
1097	int
1098	_ds_match_boundary (struct nt stack, const char buff)
1099	{
1100	struct nt_node *node;
1101	struct nt_c c;
1102
1103	node = c_nt_first (stack, &c);
1104	while (node != NULL)
1105	{
1106	if (!strncmp (buff, node->ptr, strlen (node->ptr)))
1107	{
1108	return 1;
1109	}
1110	node = c_nt_next (stack, &c);
1111	}
1112	return 0;
1113	}
1114
1115	int
1116	_ds_extract_boundary (char buf, size_t size, char mem)
1117	{
1118	char data, ptr, *ptrptr;
1119
1120	if (mem == NULL)
1121	return EINVAL;
1122
1123	data = strdup(mem);
1124	if (data == NULL) {
1125	LOG(LOG_CRIT, ERR_MEM_ALLOC);
1126	return EUNKNOWN;
1127	}
1128
1129	for(ptr=data;ptr<(data+strlen(data));ptr++) {
1130	if (!strncasecmp(ptr, "boundary", 8)) {
1131	ptr = strchr(ptr, '=');
1132	if (ptr == NULL) {
1133	free(data);
1134	return EFAILURE;
1135	}
1136	ptr++;
1137	while(isspace((int) ptr[0]))
1138	ptr++;
1139	if (ptr[0] == '"')
1140	ptr++;
1141	strtok_r(ptr, " \";\n\t", &ptrptr);
1142	strlcpy(buf, ptr, size);
1143	free(data);
1144	return 0;
1145	}
1146	}
1147
1148	free(data);
1149	return EFAILURE;
1150	}
1151
1152	/*
1153	* _ds_find_header (ds_message_t message, consr char *heading) {
1154	*
1155	* DESCRIPTION
1156	* finds a header and returns its value
1157	*
1158	* INPUT ARGUMENTS
1159	* message the message structure to search
1160	* heading the heading to search for
1161	* flags optional search flags
1162	*
1163	* RETURN VALUES
1164	* a pointer to the header structure's value
1165	*
1166	*/
1167
1168	char *
1169	_ds_find_header (ds_message_t message, const char *heading) {
1170	ds_message_part_t block;
1171	ds_header_t head;
1172	struct nt_node *node_nt;
1173
1174	if (message->components->first) {
1175	if ((block = message->components->first->ptr)==NULL)
1176	return NULL;
1177	if (block->headers == NULL)
1178	return NULL;
1179	} else {
1180	return NULL;
1181	}
1182
1183	node_nt = block->headers->first;
1184	while(node_nt != NULL) {
1185	head = (ds_header_t) node_nt->ptr;
1186	if (head && !strcasecmp(head->heading, heading)) {
1187	return head->data;
1188	}
1189	node_nt = node_nt->next;
1190	}
1191
1192	return NULL;
1193	}
1194
1195	int _ds_hex2dec(unsigned char hex) {
1196	switch (hex) {
1197	case '0': return 0;
1198	case '1': return 1;
1199	case '2': return 2;
1200	case '3': return 3;
1201	case '4': return 4;
1202	case '5': return 5;
1203	case '6': return 6;
1204	case '7': return 7;
1205	case '8': return 8;
1206	case '9': return 9;
1207	case 'a': case 'A': return 10;
1208	case 'b': case 'B': return 11;
1209	case 'c': case 'C': return 12;
1210	case 'd': case 'D': return 13;
1211	case 'e': case 'E': return 14;
1212	case 'f': case 'F': return 15;
1213	default: return -1;
1214	}
1215	}
1216
1217	/*
1218	* _ds_strip_html(const char *html)
1219	*
1220	* DESCRIPTION
1221	* strip html tags from the supplied message
1222	*
1223	* INPUT ARGUMENTS
1224	* html encoded message body
1225	*
1226	* RETURN VALUES
1227	* a pointer to the allocated character array containing the
1228	* stripped message
1229	*
1230	*/
1231
1232	char *
1233	_ds_strip_html (const char *html)
1234	{
1235	#ifdef VERBOSE
1236	LOGDEBUG("stripping HTML tags from message block");
1237	#endif
1238	size_t j = 0, k = 0, i = 0;
1239	int visible = 1;
1240	int closing_td_tag = 0;
1241	char *html2;
1242	const char *cdata_close_tag = NULL;
1243
1244	if(!html)
1245	return NULL;
1246
1247	static struct {
1248	unsigned int id;
1249	char *entity;
1250	}
1251	charset[] = {
1252	{ 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" },
1253	{ 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" },
1254	{ 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" },
1255	{ 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" },
1256	{ 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" },
1257	{ 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "" },
1258	{ 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" },
1259	{ 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" },
1260	{ 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" },
1261	{ 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" },
1262	{ 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" },
1263	{ 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" },
1264	{ 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" },
1265	{ 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" },
1266	{ 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" },
1267	{ 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" },
1268	{ 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" },
1269	{ 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" },
1270	{ 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" },
1271	{ 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" },
1272	{ 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" },
1273	{ 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" },
1274	{ 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" },
1275	{ 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" },
1276	{ 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" },
1277	{ 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" },
1278	{ 254, "þ" }, { 255, "ÿ" }, { 338, "&OElig;" }, { 339, "&oelig;" },
1279	{ 352, "&Scaron;" }, { 353, "&scaron;" }, { 376, "&Yuml;" }, { 402, "&fnof;" },
1280	{ 710, "&circ;" }, { 732, "&tilde;" }, { 913, "Α" }, { 914, "Β" },
1281	{ 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" },
1282	{ 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" },
1283	{ 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" },
1284	{ 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" },
1285	{ 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" },
1286	{ 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" },
1287	{ 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" },
1288	{ 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" },
1289	{ 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" },
1290	{ 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "&sigmaf;" },
1291	{ 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" },
1292	{ 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" },
1293	{ 978, "&upsih;" }, { 982, "ϖ" }, {8194, "&ensp;" }, {8195, "&emsp;" },
1294	{ 8201, " " }, {8204, "&zwnj;" }, {8205, "&zwj;" }, {8206, "&lrm;" },
1295	{ 8207, "&rlm;" }, {8211, "–" }, {8212, "—" }, {8216, "‘" },
1296	{ 8217, "’" }, {8218, "&sbquo;" }, {8220, "“" }, {8221, "”" },
1297	{ 8222, "&bdquo;" }, {8224, "&dagger;" }, {8225, "&Dagger;" }, {8226, "•" },
1298	{ 8230, "…" }, {8240, "&permil;" }, {8242, "′" }, {8243, "″" },
1299	{ 8249, "&lsaquo;" }, {8250, "&rsaquo;" }, {8254, "&oline;" }, {8260, "&frasl;" },
1300	{ 8364, "€" }, {8465, "&image;" }, {8472, "&weierp;" }, {8476, "&real;" },
1301	{ 8482, "™" }, {8501, "&alefsym;" }, {8592, "←" }, {8593, "↑" },
1302	{ 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "&crarr;" },
1303	{ 8656, "⇐" }, {8657, "&uArr;" }, {8658, "⇒" }, {8659, "&dArr;" },
1304	{ 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" },
1305	{ 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" },
1306	{ 8715, "&ni;" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" },
1307	{ 8727, "&lowast;" }, {8730, "√" }, {8733, "&prop;" }, {8734, "∞" },
1308	{ 8736, "&ang;" }, {8743, "&and;" }, {8744, "&or;" }, {8745, "∩" },
1309	{ 8746, "∪" }, {8747, "∫" }, {8756, "&there4;" }, {8764, "&sim;" },
1310	{ 8773, "&cong;" }, {8776, "≈" }, {8800, "≠" }, {8801, "&equiv;" },
1311	{ 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" },
1312	{ 8836, "&nsub;" }, {8838, "&sube;" }, {8839, "&supe;" }, {8853, "&oplus;" },
1313	{ 8855, "&otimes;" }, {8869, "&perp;" }, {8901, "⋅" }, {8968, "&lceil;" },
1314	{ 8969, "&rceil;" }, {8970, "&lfloor;" }, {8971, "&rfloor;" }, {9001, "&lang;" },
1315	{ 9002, "&rang;" }, {9674, "&loz;" }, {9824, "&spades;" }, {9827, "&clubs;" },
1316	{ 9829, "&hearts;" }, {9830, "&diams;" }
1317	};
1318	int num_chars = sizeof(charset) / sizeof(charset[0]);
1319
1320	static struct {
1321	char *open_tag;
1322	char *uri_tag;
1323	}
1324	uritag[] = {
1325	{ "<a", "href" }, { "<img", "src" }, { "<input", "src" },
1326	{ "<iframe", "src" }, { "<frame", "src" }, { "<script", "src" },
1327	{ "<form", "action" }, { "<embed", "src" }, { "<area", "href" },
1328	{ "<base", "href" }, { "<link", "href" }, { "<source", "src" },
1329	{ "<body", "background" }, { "<blockquote", "cite" }, { "<q", "cite" },
1330	{ "<ins", "cite" }, { "<del", "cite" }
1331	};
1332	int num_uri = sizeof(uritag) / sizeof(uritag[0]);
1333
1334	size_t len = strlen(html);
1335	html2 = malloc(len+1);
1336
1337	if (html2 == NULL) {
1338	LOG (LOG_CRIT, ERR_MEM_ALLOC);
1339	return NULL;
1340	}
1341
1342	for (i = 0; i < len; i++) {
1343	if (html[i] == '<') {
1344	if (cdata_close_tag) {
1345	if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) {
1346	i += strlen(cdata_close_tag) - 1;
1347	cdata_close_tag = NULL;
1348	}
1349	continue;
1350	} else if (strncasecmp(html + i, "</td>", 5) == 0) {
1351	i += 4;
1352	closing_td_tag = 1;
1353	continue;
1354	} else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) {
1355	if (j > 0 && !isspace(html2[j-1])) {
1356	html2[j++]=' ';
1357	}
1358	visible = 0;
1359	} else {
1360	closing_td_tag = 0;
1361	visible = 1;
1362	}
1363	k = i + 1;
1364
1365	if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) \|\|
1366	(html[k] >= 97 && html[k] <= 122) \|\|
1367	(html[k] == 47) \|\|
1368	(html[k] == 33) ))) {
1369	/* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */
1370	visible = 1;
1371	html2[j++]=html[i];
1372	i = k;
1373	const char *w = &(html[k]);
1374	while (j < len && (size_t)(w - html) < len && *w != '<') {
1375	html2[j++]=*w;
1376	w++;
1377	i++;
1378	}
1379	continue;
1380	} else if (html[k]) {
1381	/* find the end of the tag */
1382	while (k < len && html[k] != '<' && html[k] != '>') {k++;}
1383
1384	/* if we've got a tag with a uri, save the address to print later. */
1385	char *url_tag = " ";
1386	int tag_offset = 0, x = 0, y = 0;
1387	for (y = 0; y < num_uri; y++) {
1388	x = strlen(uritag[y].open_tag);
1389	if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) {
1390	url_tag = uritag[y].uri_tag;
1391	tag_offset = i + x + 1;
1392	break;
1393	}
1394	}
1395	/* tag with uri found */
1396	if (tag_offset > 0) {
1397	size_t url_start; /* start of url tag inclusive [ */
1398	size_t url_tag_len = strlen(url_tag);
1399	char delim = ' ';
1400	/* find start of uri */
1401	for (url_start = tag_offset; url_start <= k; url_start++) {
1402	if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) {
1403	url_start += url_tag_len;
1404	while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */
1405	if (html[url_start] == '=') {
1406	url_start++;
1407	while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */
1408	if (html[url_start] == '"') {
1409	delim = '"';
1410	url_start++;
1411	} else if (html[url_start] == '\'') {
1412	delim = '\'';
1413	url_start++;
1414	} else {
1415	delim = '>';
1416	}
1417	break;
1418	} else {
1419	/* Start of uri tag found but no '=' after the tag.
1420	* Skip the whole tag.
1421	*/
1422	break;
1423	}
1424	} else if ((url_start - tag_offset) >= 50) {
1425	/* The length of the html tag is over 50 characters long without
1426	* finding the start of the url/uri. Skip the whole tag.
1427	*/
1428	break;
1429	}
1430	}
1431	/* find end of uri */
1432	if (delim != ' ') {
1433	if (url_start < len &&
1434	(strncasecmp(html + url_start, "http:", 5) == 0 \|\|
1435	strncasecmp(html + url_start, "https:", 6) == 0 \|\|
1436	strncasecmp(html + url_start, "ftp:", 4) == 0)) {
1437	html2[j++]=' ';
1438	const char *w = &(html[url_start]);
1439	/* html2 is a buffer of len + 1, where the +1 is for NULL
1440	* termination. This means we only want to loop to len
1441	* since we will replace html2[j] right after the loop.
1442	*/
1443	while (j < len && (size_t)(w - html) < len && *w != delim) {
1444	html2[j++]=*w;
1445	w++;
1446	}
1447	html2[j++]=' ';
1448	}
1449	}
1450	} else if (strncasecmp(html + i, "<p>", 3) == 0
1451	\|\| strncasecmp(html + i, "<p ", 3) == 0
1452	\|\| strncasecmp(html + i, "<p\t", 3) == 0
1453	\|\| strncasecmp(html + i, "<tr", 3) == 0
1454	\|\| strncasecmp(html + i, "<option", 7) == 0
1455	\|\| strncasecmp(html + i, "<br", 3) == 0
1456	\|\| strncasecmp(html + i, "<li", 3) == 0
1457	\|\| strncasecmp(html + i, "<div", 4) == 0
1458	\|\| strncasecmp(html + i, "</select>", 9) == 0
1459	\|\| strncasecmp(html + i, "</table>", 8) == 0) {
1460	if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') {
1461	html2[j++] = '\n';
1462	}
1463	} else if (strncasecmp(html + i, "<applet", 7) == 0) {
1464	cdata_close_tag = "</applet>";
1465	} else if (strncasecmp(html + i, "<embed", 6) == 0) {
1466	cdata_close_tag = "</embed>";
1467	} else if (strncasecmp(html + i, "<frameset", 9) == 0) {
1468	cdata_close_tag = "</frameset>";
1469	} else if (strncasecmp(html + i, "<frame", 6) == 0) {
1470	cdata_close_tag = "</frame>";
1471	} else if (strncasecmp(html + i, "<iframe", 7) == 0) {
1472	cdata_close_tag = "</iframe>";
1473	} else if (strncasecmp(html + i, "<noembed", 8) == 0) {
1474	cdata_close_tag = "</noembed>";
1475	} else if (strncasecmp(html + i, "<noscript", 9) == 0) {
1476	cdata_close_tag = "</noscript>";
1477	} else if (strncasecmp(html + i, "<object", 7) == 0) {
1478	cdata_close_tag = "</object>";
1479	} else if (strncasecmp(html + i, "<script", 7) == 0) {
1480	cdata_close_tag = "</script>";
1481	} else if (strncasecmp(html + i, "<style", 6) == 0) {
1482	cdata_close_tag = "</style>";
1483	}
1484	i = (html[k] == '<' \|\| html[k] == '\0')? k - 1: k;
1485	continue;
1486	}
1487	} else if (cdata_close_tag) {
1488	continue;
1489	} else if (!isspace(html[i])) {
1490	visible = 1;
1491	}
1492
1493	if (strncmp(html+i,"&#",2)==0) {
1494	int x = 0;
1495	const char *w = &(html[i+2]);
1496	while (*w == '0') {i++;w++;}
1497	char n[5];
1498	if (html[i+4] && html[i+4] == ';'
1499	&& isdigit(html[i+2])
1500	&& isdigit(html[i+3])) {
1501	n[0] = html[i+2];
1502	n[1] = html[i+3];
1503	n[2] = 0;
1504	x = atoi(n);
1505	if (x <= 255 && x >= 32)
1506	html2[j++] = x;
1507	i += 4;
1508	} else if (html[i+6]
1509	&& html[i+6] == ';'
1510	&& isdigit(html[i+2])
1511	&& isdigit(html[i+3])
1512	&& isdigit(html[i+4])
1513	&& isdigit(html[i+5])) {
1514	n[0] = html[i+2];
1515	n[1] = html[i+3];
1516	n[2] = html[i+4];
1517	n[3] = html[i+5];
1518	n[4] = 0;
1519	x = atoi(n);
1520	if (x <= 255 && x >= 32)
1521	html2[j++] = x;
1522	i += 6;
1523	} else {
1524	const char *w = &(html[i]);
1525	while (w != ';' && w != ' ' && w != '\t' && w != '\0') {i++;w++;}
1526	}
1527	visible = 0;
1528	continue;
1529	} else if (html[i] == '&') {
1530	int x = 0, y = 0;
1531	for (y = 0; y < num_chars; y++) {
1532	x = strlen(charset[y].entity);
1533	if (strncasecmp(html+i,charset[y].entity,x)==0) {
1534	if (charset[y].id <= 255)
1535	html2[j++] = charset[y].id;
1536	i += x-1;
1537	visible = 0;
1538	continue;
1539	}
1540	}
1541	}
1542
1543	if (j < len && visible)
1544	html2[j++] = html[i];
1545
1546	if (j >= len)
1547	i = j = len;
1548	}
1549
1550	html2[j] = '\0';
1551	return (char *)html2;
1552	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: