Context Navigation

source: npl/mailserver/dspam/dspam-3.10.2/src/decode.c @ c5c522c

gcc484ntopperl-5.22

Last change on this file since c5c522c was c5c522c, checked in by Edwin Eefting <edwin@datux.nl>, 8 years ago
initial commit, transferred from cleaned syn3 svn tree
Property mode set to `100644`
File size: 43.1 KB

Rev	Line
[c5c522c]	1	/* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */
	2
	3	/*
	4	DSPAM
	5	COPYRIGHT (C) 2002-2012 DSPAM PROJECT
	6
	7	This program is free software: you can redistribute it and/or modify
	8	it under the terms of the GNU Affero General Public License as
	9	published by the Free Software Foundation, either version 3 of the
	10	License, or (at your option) any later version.
	11
	12	This program is distributed in the hope that it will be useful,
	13	but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	GNU Affero General Public License for more details.
	16
	17	You should have received a copy of the GNU Affero General Public License
	18	along with this program. If not, see <http://www.gnu.org/licenses/>.
	19
	20	*/
	21
	22	/*
	23	* decode.c - message decoding and parsing
	24	*
	25	* DESCRIPTION
	26	* This set of functions performs parsing and decoding of a message and
	27	* embeds its components into a ds_message_t structure, suitable for
	28	* logical access.
	29	*/
	30
	31	#ifdef HAVE_CONFIG_H
	32	#include <auto-config.h>
	33	#endif
	34
	35	#include <stdio.h>
	36	#include <string.h>
	37	#include <stdlib.h>
	38	#include <ctype.h>
	39
	40	#include "decode.h"
	41	#include "error.h"
	42	#include "util.h"
	43	#include "language.h"
	44	#include "buffer.h"
	45	#include "base64.h"
	46	#include "libdspam.h"
	47
	48	/*
	49	* _ds_actualize_message (const char *message)
	50	*
	51	* DESCRIPTION
	52	* primary message parser
	53	*
	54	* this function performs all decoding and actualization of the message
	55	* into the message structures defined in the .h
	56	*
	57	* INPUT ARGUMENTS
	58	* message message to decode
	59	*
	60	* RETURN VALUES
	61	* pointer to an allocated message structure (ds_message_t), NULL on failure
	62	*/
	63
	64	ds_message_t
	65	_ds_actualize_message (const char *message)
	66	{
	67	char *line = NULL;
	68	char *in = NULL;
	69	char *m_in = NULL;
	70	ds_message_part_t current_block;
	71	ds_header_t current_heading = NULL;
	72	struct nt *boundaries = NULL;
	73	ds_message_t out = NULL;
	74	int block_position = BP_HEADER;
	75	int in_content = 0;
	76
	77	if (!message \|\| !(*message))
	78	goto MEMFAIL;
	79
	80	if (!(in = strdup(message)))
	81	goto MEMFAIL;
	82
	83	m_in = in;
	84
	85	boundaries = nt_create (NT_CHAR);
	86	if (!boundaries)
	87	goto MEMFAIL;
	88
	89	out = (ds_message_t) calloc (1, sizeof (struct _ds_message));
	90	if (!out)
	91	goto MEMFAIL;
	92
	93	out->components = nt_create (NT_PTR);
	94	if (!out->components)
	95	goto MEMFAIL;
	96
	97	current_block = _ds_create_message_part ();
	98	if (!current_block)
	99	goto MEMFAIL;
	100
	101	if (nt_add (out->components, (void *) current_block) == NULL)
	102	goto MEMFAIL;
	103
	104	/* Read the message from memory */
	105
	106	line = strsep (&in, "\n");
	107	while (line)
	108	{
	109
	110	/* Header processing */
	111
	112	if (block_position == BP_HEADER)
	113	{
	114
	115	/* If we see two boundaries converged on top of one another */
	116
	117	if (_ds_match_boundary (boundaries, line))
	118	{
	119
	120	/* Add the boundary as the terminating boundary */
	121
	122	current_block->terminating_boundary = strdup (line + 2);
	123	current_block->original_encoding = current_block->encoding;
	124
	125	_ds_decode_headers(current_block);
	126	current_block = _ds_create_message_part ();
	127
	128	if (!current_block)
	129	goto MEMFAIL;
	130
	131	if (nt_add (out->components, (void *) current_block) == NULL)
	132	goto MEMFAIL;
	133
	134	block_position = BP_HEADER;
	135	}
	136
	137	/* Concatenate multiline headers to the original header field data */
	138
	139	else if (line[0] == 32 \|\| line[0] == '\t')
	140	{
	141	if (current_heading)
	142	{
	143	char eow, ptr;
	144
	145	ptr = realloc (current_heading->data,
	146	strlen (current_heading->data) + strlen (line) + 2);
	147	if (ptr)
	148	{
	149	current_heading->data = ptr;
	150	strcat (current_heading->data, "\n");
	151	strcat (current_heading->data, line);
	152	} else {
	153	goto MEMFAIL;
	154	}
	155
	156	/* Our concatenated data doesn't have any whitespace between lines */
	157	for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { }
	158
	159	ptr =
	160	realloc (current_heading->concatenated_data,
	161	strlen (current_heading->concatenated_data) + strlen (eow) + 1);
	162	if (ptr)
	163	{
	164	current_heading->concatenated_data = ptr;
	165	strcat (current_heading->concatenated_data, eow);
	166	} else {
	167	goto MEMFAIL;
	168	}
	169
	170	if (current_heading->original_data) {
	171	ptr =
	172	realloc (current_heading->original_data,
	173	strlen (current_heading->original_data) +
	174	strlen (line) + 2);
	175	if (ptr) {
	176	current_heading->original_data = ptr;
	177	strcat (current_heading->original_data, "\n");
	178	strcat (current_heading->original_data, line);
	179	} else {
	180	goto MEMFAIL;
	181	}
	182	}
	183
	184	_ds_analyze_header (current_block, current_heading, boundaries);
	185	}
	186	}
	187
	188	/* New header field when LF or CRLF is not found */
	189
	190	else if (line[0] != 0 && line[0] != 13)
	191	{
	192	ds_header_t header = _ds_create_header_field (line);
	193
	194	if (header != NULL)
	195	{
	196	_ds_analyze_header (current_block, header, boundaries);
	197	current_heading = header;
	198	nt_add (current_block->headers, header);
	199	}
	200
	201
	202	/* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */
	203
	204	} else {
	205	block_position = BP_BODY;
	206	}
	207	}
	208
	209	/* Body processing */
	210
	211	else if (block_position == BP_BODY)
	212	{
	213	/* Look for a boundary in the header of a part */
	214
	215	if (!strncasecmp (line, "Content-Type", 12)
	216	\|\| ((line[0] == 32 \|\| line[0] == 9) && in_content))
	217	{
	218	char boundary[128];
	219	in_content = 1;
	220	if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) {
	221	if (!_ds_match_boundary (boundaries, boundary)) {
	222	_ds_push_boundary (boundaries, boundary);
	223	free(current_block->boundary);
	224	current_block->boundary = strdup (boundary);
	225	}
	226	} else {
	227	_ds_push_boundary (boundaries, "");
	228	}
	229	} else {
	230	in_content = 0;
	231	}
	232
	233	/* Multipart boundary was reached; move onto next block */
	234
	235	if (_ds_match_boundary (boundaries, line))
	236	{
	237
	238	/* Add the boundary as the terminating boundary */
	239
	240	current_block->terminating_boundary = strdup (line + 2);
	241	current_block->original_encoding = current_block->encoding;
	242
	243	_ds_decode_headers(current_block);
	244	current_block = _ds_create_message_part ();
	245
	246	if (!current_block)
	247	goto MEMFAIL;
	248
	249	if (nt_add (out->components, (void *) current_block) == NULL)
	250	goto MEMFAIL;
	251
	252	block_position = BP_HEADER;
	253	}
	254
	255	/* Plain old message (or part) body */
	256
	257	else {
	258	buffer_cat (current_block->body, line);
	259
	260	/* Don't add extra \n at the end of message's body */
	261
	262	if (in != NULL)
	263	buffer_cat (current_block->body, "\n");
	264	}
	265	}
	266
	267	line = strsep (&in, "\n");
	268	} /* while (line) */
	269
	270	_ds_decode_headers(current_block);
	271
	272	free (m_in);
	273	nt_destroy (boundaries);
	274	return out;
	275
	276	MEMFAIL:
	277	if (m_in) free(m_in);
	278	if (boundaries) nt_destroy (boundaries);
	279	if (out) _ds_destroy_message(out);
	280	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	281	return NULL;
	282	}
	283
	284	/*
	285	* _ds_create_message_part
	286	*
	287	* DESCRIPTION
	288	* create and initialize a new message block component
	289	*
	290	* RETURN VALUES
	291	* pointer to an allocated message block (ds_message_part_t), NULL on failure
	292	*
	293	*/
	294
	295	ds_message_part_t
	296	_ds_create_message_part (void)
	297	{
	298	ds_message_part_t block =
	299	(ds_message_part_t) calloc (1, sizeof (struct _ds_message_part));
	300
	301	if (!block)
	302	goto MEMFAIL;
	303
	304	block->headers = nt_create (NT_PTR);
	305	if (!block->headers)
	306	goto MEMFAIL;
	307
	308	block->body = buffer_create (NULL);
	309	if (!block->body)
	310	goto MEMFAIL;
	311
	312	block->encoding = EN_UNKNOWN;
	313	block->media_type = MT_TEXT;
	314	block->media_subtype = MST_PLAIN;
	315	block->original_encoding = EN_UNKNOWN;
	316	block->content_disposition = PCD_UNKNOWN;
	317
	318	/* Not really necessary, but.. */
	319
	320	block->boundary = NULL;
	321	block->terminating_boundary = NULL;
	322	block->original_signed_body = NULL;
	323
	324
	325	return block;
	326
	327	MEMFAIL:
	328	if (block) {
	329	buffer_destroy(block->body);
	330	nt_destroy(block->headers);
	331	free(block);
	332	}
	333	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	334	return NULL;
	335	}
	336
	337	/*
	338	* _ds_create_header_field(const char *heading)
	339	*
	340	* DESCRIPTION
	341	* create and initialize a new header structure
	342	*
	343	* INPUT ARGUMENTS
	344	* heading plain text heading (e.g. "To: Mom")
	345	*
	346	* RETURN VALUES
	347	* pointer to an allocated header structure (ds_header_t), NULL on failure
	348	*/
	349
	350	ds_header_t
	351	_ds_create_header_field (const char *heading)
	352	{
	353	char *in = strdup(heading);
	354	char ptr, m = in, *data;
	355	ds_header_t header =
	356	(ds_header_t) calloc (1, sizeof (struct _ds_header_field));
	357
	358	if (!header \|\| !in)
	359	goto MEMFAIL;
	360
	361	ptr = strsep (&in, ":");
	362	if (ptr) {
	363	header->heading = strdup (ptr);
	364	if (!header->heading)
	365	goto MEMFAIL;
	366	else
	367	{
	368	if (!in)
	369	{
	370	LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't "
	371	"contains `:' character", __FILE__, __LINE__, header->heading);
	372
	373	/* Use empty string as data as fallback for comtinue processing. */
	374
	375	in = "";
	376	}
	377	else
	378	{
	379	/* Skip white space */
	380	while (in == 32 \|\| in == 9)
	381	++in;
	382	}
	383
	384	data = strdup (in);
	385	if (!data)
	386	goto MEMFAIL;
	387
	388	header->data = data;
	389	header->concatenated_data = strdup(data);
	390	}
	391	}
	392
	393	free (m);
	394	return header;
	395
	396	MEMFAIL:
	397	free(header);
	398	free(m);
	399	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	400	return NULL;
	401	}
	402
	403	/*
	404	* _ds_decode_headers (ds_message_part_t block)
	405	*
	406	* DESCRIPTION
	407	* decodes in-line encoded headers
	408	*
	409	* RETURN VALUES
	410	* returns 0 on success
	411	*/
	412
	413	int
	414	_ds_decode_headers (ds_message_part_t block) {
	415	#ifdef VERBOSE
	416	LOGDEBUG("decoding headers in message block");
	417	#endif
	418	char ptr, dptr, rest, enc;
	419	ds_header_t header;
	420	struct nt_node *node_nt;
	421	struct nt_c c_nt;
	422	long decoded_len;
	423
	424	node_nt = c_nt_first(block->headers, &c_nt);
	425	while(node_nt != NULL) {
	426	long enc_offset;
	427	header = (ds_header_t) node_nt->ptr;
	428
	429	for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++)
	430	{
	431	enc = header->concatenated_data + enc_offset;
	432
	433	if (!strncmp(enc, "=?", 2)) {
	434	int was_null = 0;
	435	char ptrptr, decoded = NULL;
	436	long offset = (long) enc - (long) header->concatenated_data;
	437
	438	if (header->original_data == NULL) {
	439	header->original_data = strdup(header->data);
	440	was_null = 1;
	441	}
	442
	443	strtok_r (enc, "?", &ptrptr);
	444	strtok_r (NULL, "?", &ptrptr);
	445	ptr = strtok_r (NULL, "?", &ptrptr);
	446	dptr = strtok_r (NULL, "?", &ptrptr);
	447	if (!dptr) {
	448	if (was_null && header->original_data != NULL)
	449	free(header->original_data);
	450	if (was_null)
	451	header->original_data = NULL;
	452	continue;
	453	}
	454
	455	rest = dptr + strlen (dptr);
	456	if (rest[0]!=0) {
	457	rest++;
	458	if (rest[0]!=0) rest++;
	459	}
	460
	461	if (ptr != NULL && (ptr[0] == 'b' \|\| ptr[0] == 'B'))
	462	decoded = _ds_decode_base64 (dptr);
	463	else if (ptr != NULL && (ptr[0] == 'q' \|\| ptr[0] == 'Q'))
	464	decoded = _ds_decode_quoted (dptr);
	465
	466	decoded_len = 0;
	467
	468	/* Append the rest of the message */
	469
	470	if (decoded)
	471	{
	472	char *new_alloc;
	473
	474	decoded_len = strlen(decoded);
	475	new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2);
	476	if (new_alloc == NULL) {
	477	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	478	}
	479	else
	480	{
	481	if (offset)
	482	strncpy(new_alloc, header->concatenated_data, offset);
	483
	484	strcat(new_alloc, decoded);
	485	strcat(new_alloc, rest);
	486	free(decoded);
	487	decoded = new_alloc;
	488	}
	489	}
	490
	491	if (decoded) {
	492	enc_offset += (decoded_len-1);
	493	free(header->concatenated_data);
	494	header->concatenated_data = decoded;
	495	}
	496	else if (was_null && header->original_data) {
	497	free(header->original_data);
	498	header->original_data = NULL;
	499	}
	500	else if (was_null) {
	501	header->original_data = NULL;
	502	}
	503	}
	504	}
	505
	506	if (header->original_data != NULL) {
	507	free(header->data);
	508	header->data = strdup(header->concatenated_data);
	509	}
	510
	511	node_nt = c_nt_next(block->headers, &c_nt);
	512	}
	513
	514	return 0;
	515	}
	516
	517	/*
	518	* _ds_analyze_header (ds_message_part_t block, ds_header_t header,
	519	* struct nt *boundaries)
	520	*
	521	* DESCRIPTION
	522	* analyzes the header passed in and performs various operations including:
	523	* - setting media type and subtype
	524	* - setting transfer encoding
	525	* - adding newly discovered boundaries
	526	*
	527	* based on the heading specified. essentially all headers should be
	528	* analyzed for future expansion
	529	*
	530	* INPUT ARGUMENTS
	531	* block the message block to which the header belongs
	532	* header the header to analyze
	533	* boundaries a list of known boundaries found within the block
	534	*/
	535
	536	void
	537	_ds_analyze_header (
	538	ds_message_part_t block,
	539	ds_header_t header,
	540	struct nt *boundaries)
	541	{
	542	if (!header \|\| !block \|\| !header->data)
	543	return;
	544
	545	/* Content-Type header */
	546
	547	if (!strcasecmp (header->heading, "Content-Type"))
	548	{
	549	int len = strlen(header->data);
	550	if (!strncasecmp (header->data, "text", 4)) {
	551	block->media_type = MT_TEXT;
	552	if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5))
	553	block->media_subtype = MST_PLAIN;
	554	else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4))
	555	block->media_subtype = MST_HTML;
	556	else
	557	block->media_subtype = MST_OTHER;
	558	}
	559
	560	else if (!strncasecmp (header->data, "application", 11))
	561	{
	562	block->media_type = MT_APPLICATION;
	563	if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15))
	564	block->media_subtype = MST_DSPAM_SIGNATURE;
	565	else
	566	block->media_subtype = MST_OTHER;
	567	}
	568
	569	else if (!strncasecmp (header->data, "message", 7))
	570	{
	571	block->media_type = MT_MESSAGE;
	572	if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6))
	573	block->media_subtype = MST_RFC822;
	574	else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11))
	575	block->media_subtype = MST_INOCULATION;
	576	else
	577	block->media_subtype = MST_OTHER;
	578	}
	579
	580	else if (!strncasecmp (header->data, "multipart", 9))
	581	{
	582	char boundary[128];
	583
	584	block->media_type = MT_MULTIPART;
	585	if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5))
	586	block->media_subtype = MST_MIXED;
	587	else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11))
	588	block->media_subtype = MST_ALTERNATIVE;
	589	else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6))
	590	block->media_subtype = MST_SIGNED;
	591	else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9))
	592	block->media_subtype = MST_ENCRYPTED;
	593	else
	594	block->media_subtype = MST_OTHER;
	595
	596	if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) {
	597	if (!_ds_match_boundary (boundaries, boundary)) {
	598	_ds_push_boundary (boundaries, boundary);
	599	free(block->boundary);
	600	block->boundary = strdup (boundary);
	601	}
	602	} else {
	603	_ds_push_boundary (boundaries, "");
	604	}
	605	}
	606	else {
	607	block->media_type = MT_OTHER;
	608	block->media_subtype = MST_OTHER;
	609	}
	610
	611	}
	612
	613	/* Content-Transfer-Encoding */
	614
	615	else if (!strcasecmp (header->heading, "Content-Transfer-Encoding"))
	616	{
	617	if (!strncasecmp (header->data, "7bit", 4))
	618	block->encoding = EN_7BIT;
	619	else if (!strncasecmp (header->data, "8bit", 4))
	620	block->encoding = EN_8BIT;
	621	else if (!strncasecmp (header->data, "quoted-printable", 16))
	622	block->encoding = EN_QUOTED_PRINTABLE;
	623	else if (!strncasecmp (header->data, "base64", 6))
	624	block->encoding = EN_BASE64;
	625	else if (!strncasecmp (header->data, "binary", 6))
	626	block->encoding = EN_BINARY;
	627	else
	628	block->encoding = EN_OTHER;
	629	}
	630
	631	if (!strcasecmp (header->heading, "Content-Disposition"))
	632	{
	633	if (!strncasecmp (header->data, "inline", 6))
	634	block->content_disposition = PCD_INLINE;
	635	else if (!strncasecmp (header->data, "attachment", 10))
	636	block->content_disposition = PCD_ATTACHMENT;
	637	else
	638	block->content_disposition = PCD_OTHER;
	639	}
	640
	641	return;
	642	}
	643
	644	/*
	645	* _ds_destroy_message (ds_message_t message)
	646	*
	647	* DESCRIPTION
	648	* destroys a message structure (ds_message_t)
	649	*
	650	* INPUT ARGUMENTS
	651	* message the message structure to be destroyed
	652	*/
	653
	654	void
	655	_ds_destroy_message (ds_message_t message)
	656	{
	657	struct nt_node *node_nt;
	658	struct nt_c c;
	659
	660	if (message == NULL)
	661	return;
	662
	663	if (message->components) {
	664	node_nt = c_nt_first (message->components, &c);
	665	while (node_nt != NULL)
	666	{
	667	ds_message_part_t block = (ds_message_part_t) node_nt->ptr;
	668	_ds_destroy_block(block);
	669	node_nt = c_nt_next (message->components, &c);
	670	}
	671	nt_destroy (message->components);
	672	}
	673	free (message);
	674	return;
	675	}
	676
	677	/*
	678	* _ds_destroy_headers (ds_message_part_t block)
	679	*
	680	* DESCRIPTION
	681	* destroys a message block's header pairs
	682	* does not free the structures themselves; these are freed at nt_destroy
	683	*
	684	* INPUT ARGUMENTS
	685	* block the message block containing the headers to destsroy
	686	*/
	687
	688	void
	689	_ds_destroy_headers (ds_message_part_t block)
	690	{
	691	struct nt_node *node_nt;
	692	struct nt_c c;
	693
	694	if (!block \|\| !block->headers)
	695	return;
	696
	697	node_nt = c_nt_first (block->headers, &c);
	698	while (node_nt != NULL)
	699	{
	700	ds_header_t field = (ds_header_t) node_nt->ptr;
	701
	702	if (field)
	703	{
	704	free (field->original_data);
	705	free (field->heading);
	706	free (field->concatenated_data);
	707	free (field->data);
	708	}
	709	node_nt = c_nt_next (block->headers, &c);
	710	}
	711
	712	return;
	713	}
	714
	715	/*
	716	* _ds_destroy_block (ds_message_part_t block)
	717	*
	718	* DESCRIPTION
	719	* destroys a message block
	720	*
	721	* INPUT ARGUMENTS
	722	* block the message block to destroy
	723	*/
	724
	725	void
	726	_ds_destroy_block (ds_message_part_t block)
	727	{
	728	if (!block)
	729	return;
	730
	731	if (block->headers)
	732	{
	733	_ds_destroy_headers (block);
	734	nt_destroy (block->headers);
	735	}
	736	buffer_destroy (block->body);
	737	buffer_destroy (block->original_signed_body);
	738	free (block->boundary);
	739	free (block->terminating_boundary);
	740	// free (block);
	741	return;
	742	}
	743
	744	/*
	745	* _ds_decode_block (ds_message_part_t block)
	746	*
	747	* DESCRIPTION
	748	* decodes a message block
	749	*
	750	* INPUT ARGUMENTS
	751	* block the message block to decode
	752	*
	753	* RETURN VALUES
	754	* a pointer to the allocated character array containing the decoded message
	755	* NULL on failure
	756	*/
	757
	758	char *
	759	_ds_decode_block (ds_message_part_t block)
	760	{
	761	if (block->encoding == EN_BASE64)
	762	return _ds_decode_base64 (block->body->data);
	763	else if (block->encoding == EN_QUOTED_PRINTABLE)
	764	return _ds_decode_quoted (block->body->data);
	765
	766	LOG (LOG_WARNING, "decoding of block encoding type %d not supported",
	767	block->encoding);
	768	return NULL;
	769	}
	770
	771	/*
	772	* _ds_decode_{base64,quoted,hex8bit}
	773	*
	774	* DESCRIPTION
	775	* supporting block decoder functions
	776	* these function call (or perform) specific decoding functions
	777	*
	778	* INPUT ARGUMENTS
	779	* body encoded message body
	780	*
	781	* RETURN VALUES
	782	* a pointer to the allocated character array containing the decoded body
	783	*/
	784
	785	char *
	786	_ds_decode_base64 (const char *body)
	787	{
	788	if (body == NULL)
	789	return NULL;
	790
	791	return base64decode (body);
	792	}
	793
	794	char *
	795	_ds_decode_quoted (const char *body)
	796	{
	797	#ifdef VERBOSE
	798	LOGDEBUG("decoding Quoted Printable encoded buffer");
	799	#endif
	800	if (!body)
	801	return NULL;
	802
	803	char n, out;
	804	const char end, p;
	805
	806	n = out = malloc(strlen(body)+1);
	807	end = body + strlen(body);
	808
	809	if (out == NULL) {
	810	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	811	return NULL;
	812	}
	813
	814	for (p = body; p < end; p++, n++) {
	815	if (*p == '=') {
	816	if (p[1] == '\r' && p[2] == '\n') {
	817	n -= 1;
	818	p += 2;
	819	} else if (p[1] == '\n') {
	820	n -= 1;
	821	p += 1;
	822	} else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
	823	*n = ((_ds_hex2dec((unsigned char) p[1])) << 4) \| (_ds_hex2dec((unsigned char) p[2]));
	824	p += 2;
	825	} else
	826	n = p;
	827	} else
	828	n = p;
	829	}
	830
	831	*n = '\0';
	832	return (char *)out;
	833	}
	834
	835	char *
	836	_ds_decode_hex8bit (const char *body)
	837	{
	838	#ifdef VERBOSE
	839	LOGDEBUG("decoding hexadecimal 8-bit encodings in message block");
	840	#endif
	841	if (!body)
	842	return NULL;
	843
	844	char n, out;
	845	const char end, p;
	846
	847	n = out = malloc(strlen(body)+1);
	848	end = body + strlen(body);
	849
	850	if (out == NULL) {
	851	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	852	return NULL;
	853	}
	854
	855	for (p = body; p < end; p++, n++) {
	856	if (*p == '%')
	857	if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) {
	858	*n = ((_ds_hex2dec((unsigned char) p[1])) << 4) \| (_ds_hex2dec((unsigned char) p[2]));
	859	p += 2;
	860	} else
	861	n = p;
	862	else
	863	n = p;
	864	}
	865
	866	*n = '\0';
	867	return (char *)out;
	868	}
	869
	870	/*
	871	* _ds_encode_block (ds_message_part_t block, int encoding)
	872	*
	873	* DESCRIPTION
	874	* encodes a message block using the encoding specified and replaces the
	875	* block's message body with the encoded data
	876	*
	877	* INPUT ARGUMENTS
	878	* block the message block to encode
	879	* encoding encoding to use (EN_)
	880	*
	881	* RETURN VALUES
	882	* returns 0 on success
	883	*/
	884
	885	int
	886	_ds_encode_block (ds_message_part_t block, int encoding)
	887	{
	888	/* we can't encode a block with the same encoding */
	889
	890	if (block->encoding == encoding)
	891	return EINVAL;
	892
	893	/* we can't encode a block that's already encoded */
	894
	895	if (block->encoding == EN_BASE64 \|\| block->encoding == EN_QUOTED_PRINTABLE)
	896	return EFAILURE;
	897
	898	if (encoding == EN_BASE64) {
	899	char *encoded = _ds_encode_base64 (block->body->data);
	900	buffer_destroy (block->body);
	901	block->body = buffer_create (encoded);
	902	free (encoded);
	903	block->encoding = EN_BASE64;
	904	}
	905	else if (encoding == EN_QUOTED_PRINTABLE) {
	906
	907	/* TODO */
	908
	909	return 0;
	910	}
	911
	912	LOGDEBUG("unsupported encoding: %d", encoding);
	913	return 0;
	914	}
	915
	916	/*
	917	* _ds_encode_{base64,quoted}
	918	*
	919	* DESCRIPTION
	920	* supporting block encoder functions
	921	* these function call (or perform) specific encoding functions
	922	*
	923	* INPUT ARGUMENTS
	924	* body decoded message body
	925	*
	926	* RETURN VALUES
	927	* a pointer to the allocated character array containing the encoded body
	928	*/
	929
	930	char *
	931	_ds_encode_base64 (const char *body)
	932	{
	933	return base64encode (body);
	934	}
	935
	936	/*
	937	* _ds_assemble_message (ds_message_t message)
	938	*
	939	* DESCRIPTION
	940	* assembles a message structure into a flat text message
	941	*
	942	* INPUT ARGUMENTS
	943	* message the message structure (ds_message_t) to assemble
	944	*
	945	* RETURN VALUES
	946	* a pointer to the allocated character array containing the text message
	947	*/
	948
	949	char *
	950	_ds_assemble_message (ds_message_t message, const char *newline)
	951	{
	952	buffer *out = buffer_create (NULL);
	953	struct nt_node node_nt, node_header;
	954	struct nt_c c_nt, c_nt2;
	955	char *heading;
	956	char *copyback;
	957	#ifdef VERBOSE
	958	int i = 0;
	959	#endif
	960
	961	if (!out) {
	962	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	963	return NULL;
	964	}
	965
	966	node_nt = c_nt_first (message->components, &c_nt);
	967	while (node_nt != NULL && node_nt->ptr != NULL)
	968	{
	969	ds_message_part_t block =
	970	(ds_message_part_t) node_nt->ptr;
	971	#ifdef VERBOSE
	972	LOGDEBUG ("assembling component %d", i);
	973	#endif
	974
	975	/* Assemble headers */
	976
	977	if (block->headers != NULL && block->headers->items > 0)
	978	{
	979	node_header = c_nt_first (block->headers, &c_nt2);
	980	while (node_header != NULL)
	981	{
	982	char *data;
	983	ds_header_t current_header =
	984	(ds_header_t) node_header->ptr;
	985
	986	data = (current_header->original_data == NULL) ? current_header->data :
	987	current_header->original_data;
	988
	989	heading = malloc(
	990	((current_header->heading) ? strlen(current_header->heading) : 0)
	991	+ ((data) ? strlen(data) : 0)
	992	+ 3 + strlen(newline));
	993
	994	if (current_header->heading != NULL &&
	995	(!strncmp (current_header->heading, "From ", 5) \|\|
	996	!strncmp (current_header->heading, "--", 2)))
	997	sprintf (heading, "%s:%s%s",
	998	(current_header->heading) ? current_header->heading : "",
	999	(data) ? data : "", newline);
	1000	else
	1001	sprintf (heading, "%s: %s%s",
	1002	(current_header->heading) ? current_header->heading : "",
	1003	(data) ? data : "", newline);
	1004
	1005	buffer_cat (out, heading);
	1006	free(heading);
	1007	node_header = c_nt_next (block->headers, &c_nt2);
	1008	}
	1009	}
	1010
	1011	buffer_cat (out, newline);
	1012
	1013	/* Assemble bodies */
	1014
	1015	if (block->original_signed_body != NULL && message->protect)
	1016	buffer_cat (out, block->original_signed_body->data);
	1017	else
	1018	buffer_cat (out, block->body->data);
	1019
	1020	if (block->terminating_boundary != NULL)
	1021	{
	1022	buffer_cat (out, "--");
	1023	buffer_cat (out, block->terminating_boundary);
	1024	}
	1025
	1026	node_nt = c_nt_next (message->components, &c_nt);
	1027	#ifdef VERBOSE
	1028	i++;
	1029	#endif
	1030
	1031	if (node_nt != NULL && node_nt->ptr != NULL)
	1032	buffer_cat (out, newline);
	1033	}
	1034
	1035	copyback = out->data;
	1036	out->data = NULL;
	1037	buffer_destroy (out);
	1038	return copyback;
	1039	}
	1040
	1041	/*
	1042	* _ds_{push,pop,match,extract}_boundary
	1043	*
	1044	* DESCRIPTION
	1045	* these functions maintain and service a boundary "stack" on the message
	1046	*/
	1047
	1048	int
	1049	_ds_push_boundary (struct nt stack, const char boundary)
	1050	{
	1051	char *y;
	1052
	1053	if (boundary == NULL \|\| boundary[0] == 0)
	1054	return EINVAL;
	1055
	1056	y = malloc (strlen (boundary) + 3);
	1057	if (y == NULL)
	1058	return EUNKNOWN;
	1059
	1060	sprintf (y, "--%s", boundary);
	1061	nt_add (stack, (char *) y);
	1062	free(y);
	1063
	1064	return 0;
	1065	}
	1066
	1067	char *
	1068	_ds_pop_boundary (struct nt *stack)
	1069	{
	1070	struct nt_node node, last_node = NULL, *parent_node = NULL;
	1071	struct nt_c c;
	1072	char *boundary = NULL;
	1073
	1074	node = c_nt_first (stack, &c);
	1075	while (node != NULL)
	1076	{
	1077	parent_node = last_node;
	1078	last_node = node;
	1079	node = c_nt_next (stack, &c);
	1080	}
	1081	if (parent_node != NULL)
	1082	parent_node->next = NULL;
	1083	else
	1084	stack->first = NULL;
	1085
	1086	if (last_node == NULL)
	1087	return NULL;
	1088
	1089	boundary = strdup (last_node->ptr);
	1090
	1091	free (last_node->ptr);
	1092	free (last_node);
	1093
	1094	return boundary;
	1095	}
	1096
	1097	int
	1098	_ds_match_boundary (struct nt stack, const char buff)
	1099	{
	1100	struct nt_node *node;
	1101	struct nt_c c;
	1102
	1103	node = c_nt_first (stack, &c);
	1104	while (node != NULL)
	1105	{
	1106	if (!strncmp (buff, node->ptr, strlen (node->ptr)))
	1107	{
	1108	return 1;
	1109	}
	1110	node = c_nt_next (stack, &c);
	1111	}
	1112	return 0;
	1113	}
	1114
	1115	int
	1116	_ds_extract_boundary (char buf, size_t size, char mem)
	1117	{
	1118	char data, ptr, *ptrptr;
	1119
	1120	if (mem == NULL)
	1121	return EINVAL;
	1122
	1123	data = strdup(mem);
	1124	if (data == NULL) {
	1125	LOG(LOG_CRIT, ERR_MEM_ALLOC);
	1126	return EUNKNOWN;
	1127	}
	1128
	1129	for(ptr=data;ptr<(data+strlen(data));ptr++) {
	1130	if (!strncasecmp(ptr, "boundary", 8)) {
	1131	ptr = strchr(ptr, '=');
	1132	if (ptr == NULL) {
	1133	free(data);
	1134	return EFAILURE;
	1135	}
	1136	ptr++;
	1137	while(isspace((int) ptr[0]))
	1138	ptr++;
	1139	if (ptr[0] == '"')
	1140	ptr++;
	1141	strtok_r(ptr, " \";\n\t", &ptrptr);
	1142	strlcpy(buf, ptr, size);
	1143	free(data);
	1144	return 0;
	1145	}
	1146	}
	1147
	1148	free(data);
	1149	return EFAILURE;
	1150	}
	1151
	1152	/*
	1153	* _ds_find_header (ds_message_t message, consr char *heading) {
	1154	*
	1155	* DESCRIPTION
	1156	* finds a header and returns its value
	1157	*
	1158	* INPUT ARGUMENTS
	1159	* message the message structure to search
	1160	* heading the heading to search for
	1161	* flags optional search flags
	1162	*
	1163	* RETURN VALUES
	1164	* a pointer to the header structure's value
	1165	*
	1166	*/
	1167
	1168	char *
	1169	_ds_find_header (ds_message_t message, const char *heading) {
	1170	ds_message_part_t block;
	1171	ds_header_t head;
	1172	struct nt_node *node_nt;
	1173
	1174	if (message->components->first) {
	1175	if ((block = message->components->first->ptr)==NULL)
	1176	return NULL;
	1177	if (block->headers == NULL)
	1178	return NULL;
	1179	} else {
	1180	return NULL;
	1181	}
	1182
	1183	node_nt = block->headers->first;
	1184	while(node_nt != NULL) {
	1185	head = (ds_header_t) node_nt->ptr;
	1186	if (head && !strcasecmp(head->heading, heading)) {
	1187	return head->data;
	1188	}
	1189	node_nt = node_nt->next;
	1190	}
	1191
	1192	return NULL;
	1193	}
	1194
	1195	int _ds_hex2dec(unsigned char hex) {
	1196	switch (hex) {
	1197	case '0': return 0;
	1198	case '1': return 1;
	1199	case '2': return 2;
	1200	case '3': return 3;
	1201	case '4': return 4;
	1202	case '5': return 5;
	1203	case '6': return 6;
	1204	case '7': return 7;
	1205	case '8': return 8;
	1206	case '9': return 9;
	1207	case 'a': case 'A': return 10;
	1208	case 'b': case 'B': return 11;
	1209	case 'c': case 'C': return 12;
	1210	case 'd': case 'D': return 13;
	1211	case 'e': case 'E': return 14;
	1212	case 'f': case 'F': return 15;
	1213	default: return -1;
	1214	}
	1215	}
	1216
	1217	/*
	1218	* _ds_strip_html(const char *html)
	1219	*
	1220	* DESCRIPTION
	1221	* strip html tags from the supplied message
	1222	*
	1223	* INPUT ARGUMENTS
	1224	* html encoded message body
	1225	*
	1226	* RETURN VALUES
	1227	* a pointer to the allocated character array containing the
	1228	* stripped message
	1229	*
	1230	*/
	1231
	1232	char *
	1233	_ds_strip_html (const char *html)
	1234	{
	1235	#ifdef VERBOSE
	1236	LOGDEBUG("stripping HTML tags from message block");
	1237	#endif
	1238	size_t j = 0, k = 0, i = 0;
	1239	int visible = 1;
	1240	int closing_td_tag = 0;
	1241	char *html2;
	1242	const char *cdata_close_tag = NULL;
	1243
	1244	if(!html)
	1245	return NULL;
	1246
	1247	static struct {
	1248	unsigned int id;
	1249	char *entity;
	1250	}
	1251	charset[] = {
	1252	{ 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" },
	1253	{ 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" },
	1254	{ 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" },
	1255	{ 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" },
	1256	{ 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" },
	1257	{ 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "" },
	1258	{ 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" },
	1259	{ 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" },
	1260	{ 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" },
	1261	{ 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" },
	1262	{ 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" },
	1263	{ 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" },
	1264	{ 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" },
	1265	{ 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" },
	1266	{ 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" },
	1267	{ 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" },
	1268	{ 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" },
	1269	{ 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" },
	1270	{ 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" },
	1271	{ 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" },
	1272	{ 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" },
	1273	{ 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" },
	1274	{ 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" },
	1275	{ 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" },
	1276	{ 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" },
	1277	{ 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" },
	1278	{ 254, "þ" }, { 255, "ÿ" }, { 338, "&OElig;" }, { 339, "&oelig;" },
	1279	{ 352, "&Scaron;" }, { 353, "&scaron;" }, { 376, "&Yuml;" }, { 402, "&fnof;" },
	1280	{ 710, "&circ;" }, { 732, "&tilde;" }, { 913, "Α" }, { 914, "Β" },
	1281	{ 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" },
	1282	{ 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" },
	1283	{ 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" },
	1284	{ 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" },
	1285	{ 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" },
	1286	{ 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" },
	1287	{ 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" },
	1288	{ 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" },
	1289	{ 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" },
	1290	{ 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "&sigmaf;" },
	1291	{ 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" },
	1292	{ 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" },
	1293	{ 978, "&upsih;" }, { 982, "ϖ" }, {8194, "&ensp;" }, {8195, "&emsp;" },
	1294	{ 8201, " " }, {8204, "&zwnj;" }, {8205, "&zwj;" }, {8206, "&lrm;" },
	1295	{ 8207, "&rlm;" }, {8211, "–" }, {8212, "—" }, {8216, "‘" },
	1296	{ 8217, "’" }, {8218, "&sbquo;" }, {8220, "“" }, {8221, "”" },
	1297	{ 8222, "&bdquo;" }, {8224, "&dagger;" }, {8225, "&Dagger;" }, {8226, "•" },
	1298	{ 8230, "…" }, {8240, "&permil;" }, {8242, "′" }, {8243, "″" },
	1299	{ 8249, "&lsaquo;" }, {8250, "&rsaquo;" }, {8254, "&oline;" }, {8260, "&frasl;" },
	1300	{ 8364, "€" }, {8465, "&image;" }, {8472, "&weierp;" }, {8476, "&real;" },
	1301	{ 8482, "™" }, {8501, "&alefsym;" }, {8592, "←" }, {8593, "↑" },
	1302	{ 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "&crarr;" },
	1303	{ 8656, "⇐" }, {8657, "&uArr;" }, {8658, "⇒" }, {8659, "&dArr;" },
	1304	{ 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" },
	1305	{ 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" },
	1306	{ 8715, "&ni;" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" },
	1307	{ 8727, "&lowast;" }, {8730, "√" }, {8733, "&prop;" }, {8734, "∞" },
	1308	{ 8736, "&ang;" }, {8743, "&and;" }, {8744, "&or;" }, {8745, "∩" },
	1309	{ 8746, "∪" }, {8747, "∫" }, {8756, "&there4;" }, {8764, "&sim;" },
	1310	{ 8773, "&cong;" }, {8776, "≈" }, {8800, "≠" }, {8801, "&equiv;" },
	1311	{ 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" },
	1312	{ 8836, "&nsub;" }, {8838, "&sube;" }, {8839, "&supe;" }, {8853, "&oplus;" },
	1313	{ 8855, "&otimes;" }, {8869, "&perp;" }, {8901, "⋅" }, {8968, "&lceil;" },
	1314	{ 8969, "&rceil;" }, {8970, "&lfloor;" }, {8971, "&rfloor;" }, {9001, "&lang;" },
	1315	{ 9002, "&rang;" }, {9674, "&loz;" }, {9824, "&spades;" }, {9827, "&clubs;" },
	1316	{ 9829, "&hearts;" }, {9830, "&diams;" }
	1317	};
	1318	int num_chars = sizeof(charset) / sizeof(charset[0]);
	1319
	1320	static struct {
	1321	char *open_tag;
	1322	char *uri_tag;
	1323	}
	1324	uritag[] = {
	1325	{ "<a", "href" }, { "<img", "src" }, { "<input", "src" },
	1326	{ "<iframe", "src" }, { "<frame", "src" }, { "<script", "src" },
	1327	{ "<form", "action" }, { "<embed", "src" }, { "<area", "href" },
	1328	{ "<base", "href" }, { "<link", "href" }, { "<source", "src" },
	1329	{ "<body", "background" }, { "<blockquote", "cite" }, { "<q", "cite" },
	1330	{ "<ins", "cite" }, { "<del", "cite" }
	1331	};
	1332	int num_uri = sizeof(uritag) / sizeof(uritag[0]);
	1333
	1334	size_t len = strlen(html);
	1335	html2 = malloc(len+1);
	1336
	1337	if (html2 == NULL) {
	1338	LOG (LOG_CRIT, ERR_MEM_ALLOC);
	1339	return NULL;
	1340	}
	1341
	1342	for (i = 0; i < len; i++) {
	1343	if (html[i] == '<') {
	1344	if (cdata_close_tag) {
	1345	if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) {
	1346	i += strlen(cdata_close_tag) - 1;
	1347	cdata_close_tag = NULL;
	1348	}
	1349	continue;
	1350	} else if (strncasecmp(html + i, "</td>", 5) == 0) {
	1351	i += 4;
	1352	closing_td_tag = 1;
	1353	continue;
	1354	} else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) {
	1355	if (j > 0 && !isspace(html2[j-1])) {
	1356	html2[j++]=' ';
	1357	}
	1358	visible = 0;
	1359	} else {
	1360	closing_td_tag = 0;
	1361	visible = 1;
	1362	}
	1363	k = i + 1;
	1364
	1365	if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) \|\|
	1366	(html[k] >= 97 && html[k] <= 122) \|\|
	1367	(html[k] == 47) \|\|
	1368	(html[k] == 33) ))) {
	1369	/* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */
	1370	visible = 1;
	1371	html2[j++]=html[i];
	1372	i = k;
	1373	const char *w = &(html[k]);
	1374	while (j < len && (size_t)(w - html) < len && *w != '<') {
	1375	html2[j++]=*w;
	1376	w++;
	1377	i++;
	1378	}
	1379	continue;
	1380	} else if (html[k]) {
	1381	/* find the end of the tag */
	1382	while (k < len && html[k] != '<' && html[k] != '>') {k++;}
	1383
	1384	/* if we've got a tag with a uri, save the address to print later. */
	1385	char *url_tag = " ";
	1386	int tag_offset = 0, x = 0, y = 0;
	1387	for (y = 0; y < num_uri; y++) {
	1388	x = strlen(uritag[y].open_tag);
	1389	if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) {
	1390	url_tag = uritag[y].uri_tag;
	1391	tag_offset = i + x + 1;
	1392	break;
	1393	}
	1394	}
	1395	/* tag with uri found */
	1396	if (tag_offset > 0) {
	1397	size_t url_start; /* start of url tag inclusive [ */
	1398	size_t url_tag_len = strlen(url_tag);
	1399	char delim = ' ';
	1400	/* find start of uri */
	1401	for (url_start = tag_offset; url_start <= k; url_start++) {
	1402	if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) {
	1403	url_start += url_tag_len;
	1404	while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */
	1405	if (html[url_start] == '=') {
	1406	url_start++;
	1407	while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */
	1408	if (html[url_start] == '"') {
	1409	delim = '"';
	1410	url_start++;
	1411	} else if (html[url_start] == '\'') {
	1412	delim = '\'';
	1413	url_start++;
	1414	} else {
	1415	delim = '>';
	1416	}
	1417	break;
	1418	} else {
	1419	/* Start of uri tag found but no '=' after the tag.
	1420	* Skip the whole tag.
	1421	*/
	1422	break;
	1423	}
	1424	} else if ((url_start - tag_offset) >= 50) {
	1425	/* The length of the html tag is over 50 characters long without
	1426	* finding the start of the url/uri. Skip the whole tag.
	1427	*/
	1428	break;
	1429	}
	1430	}
	1431	/* find end of uri */
	1432	if (delim != ' ') {
	1433	if (url_start < len &&
	1434	(strncasecmp(html + url_start, "http:", 5) == 0 \|\|
	1435	strncasecmp(html + url_start, "https:", 6) == 0 \|\|
	1436	strncasecmp(html + url_start, "ftp:", 4) == 0)) {
	1437	html2[j++]=' ';
	1438	const char *w = &(html[url_start]);
	1439	/* html2 is a buffer of len + 1, where the +1 is for NULL
	1440	* termination. This means we only want to loop to len
	1441	* since we will replace html2[j] right after the loop.
	1442	*/
	1443	while (j < len && (size_t)(w - html) < len && *w != delim) {
	1444	html2[j++]=*w;
	1445	w++;
	1446	}
	1447	html2[j++]=' ';
	1448	}
	1449	}
	1450	} else if (strncasecmp(html + i, "<p>", 3) == 0
	1451	\|\| strncasecmp(html + i, "<p ", 3) == 0
	1452	\|\| strncasecmp(html + i, "<p\t", 3) == 0
	1453	\|\| strncasecmp(html + i, "<tr", 3) == 0
	1454	\|\| strncasecmp(html + i, "<option", 7) == 0
	1455	\|\| strncasecmp(html + i, "<br", 3) == 0
	1456	\|\| strncasecmp(html + i, "<li", 3) == 0
	1457	\|\| strncasecmp(html + i, "<div", 4) == 0
	1458	\|\| strncasecmp(html + i, "</select>", 9) == 0
	1459	\|\| strncasecmp(html + i, "</table>", 8) == 0) {
	1460	if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') {
	1461	html2[j++] = '\n';
	1462	}
	1463	} else if (strncasecmp(html + i, "<applet", 7) == 0) {
	1464	cdata_close_tag = "</applet>";
	1465	} else if (strncasecmp(html + i, "<embed", 6) == 0) {
	1466	cdata_close_tag = "</embed>";
	1467	} else if (strncasecmp(html + i, "<frameset", 9) == 0) {
	1468	cdata_close_tag = "</frameset>";
	1469	} else if (strncasecmp(html + i, "<frame", 6) == 0) {
	1470	cdata_close_tag = "</frame>";
	1471	} else if (strncasecmp(html + i, "<iframe", 7) == 0) {
	1472	cdata_close_tag = "</iframe>";
	1473	} else if (strncasecmp(html + i, "<noembed", 8) == 0) {
	1474	cdata_close_tag = "</noembed>";
	1475	} else if (strncasecmp(html + i, "<noscript", 9) == 0) {
	1476	cdata_close_tag = "</noscript>";
	1477	} else if (strncasecmp(html + i, "<object", 7) == 0) {
	1478	cdata_close_tag = "</object>";
	1479	} else if (strncasecmp(html + i, "<script", 7) == 0) {
	1480	cdata_close_tag = "</script>";
	1481	} else if (strncasecmp(html + i, "<style", 6) == 0) {
	1482	cdata_close_tag = "</style>";
	1483	}
	1484	i = (html[k] == '<' \|\| html[k] == '\0')? k - 1: k;
	1485	continue;
	1486	}
	1487	} else if (cdata_close_tag) {
	1488	continue;
	1489	} else if (!isspace(html[i])) {
	1490	visible = 1;
	1491	}
	1492
	1493	if (strncmp(html+i,"&#",2)==0) {
	1494	int x = 0;
	1495	const char *w = &(html[i+2]);
	1496	while (*w == '0') {i++;w++;}
	1497	char n[5];
	1498	if (html[i+4] && html[i+4] == ';'
	1499	&& isdigit(html[i+2])
	1500	&& isdigit(html[i+3])) {
	1501	n[0] = html[i+2];
	1502	n[1] = html[i+3];
	1503	n[2] = 0;
	1504	x = atoi(n);
	1505	if (x <= 255 && x >= 32)
	1506	html2[j++] = x;
	1507	i += 4;
	1508	} else if (html[i+6]
	1509	&& html[i+6] == ';'
	1510	&& isdigit(html[i+2])
	1511	&& isdigit(html[i+3])
	1512	&& isdigit(html[i+4])
	1513	&& isdigit(html[i+5])) {
	1514	n[0] = html[i+2];
	1515	n[1] = html[i+3];
	1516	n[2] = html[i+4];
	1517	n[3] = html[i+5];
	1518	n[4] = 0;
	1519	x = atoi(n);
	1520	if (x <= 255 && x >= 32)
	1521	html2[j++] = x;
	1522	i += 6;
	1523	} else {
	1524	const char *w = &(html[i]);
	1525	while (w != ';' && w != ' ' && w != '\t' && w != '\0') {i++;w++;}
	1526	}
	1527	visible = 0;
	1528	continue;
	1529	} else if (html[i] == '&') {
	1530	int x = 0, y = 0;
	1531	for (y = 0; y < num_chars; y++) {
	1532	x = strlen(charset[y].entity);
	1533	if (strncasecmp(html+i,charset[y].entity,x)==0) {
	1534	if (charset[y].id <= 255)
	1535	html2[j++] = charset[y].id;
	1536	i += x-1;
	1537	visible = 0;
	1538	continue;
	1539	}
	1540	}
	1541	}
	1542
	1543	if (j < len && visible)
	1544	html2[j++] = html[i];
	1545
	1546	if (j >= len)
	1547	i = j = len;
	1548	}
	1549
	1550	html2[j] = '\0';
	1551	return (char *)html2;
	1552	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: