1 | /* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */ |
---|
2 | |
---|
3 | /* |
---|
4 | DSPAM |
---|
5 | COPYRIGHT (C) 2002-2012 DSPAM PROJECT |
---|
6 | |
---|
7 | This program is free software: you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU Affero General Public License as |
---|
9 | published by the Free Software Foundation, either version 3 of the |
---|
10 | License, or (at your option) any later version. |
---|
11 | |
---|
12 | This program is distributed in the hope that it will be useful, |
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | GNU Affero General Public License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU Affero General Public License |
---|
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
19 | |
---|
20 | */ |
---|
21 | |
---|
22 | /* |
---|
23 | * decode.c - message decoding and parsing |
---|
24 | * |
---|
25 | * DESCRIPTION |
---|
26 | * This set of functions performs parsing and decoding of a message and |
---|
27 | * embeds its components into a ds_message_t structure, suitable for |
---|
28 | * logical access. |
---|
29 | */ |
---|
30 | |
---|
31 | #ifdef HAVE_CONFIG_H |
---|
32 | #include <auto-config.h> |
---|
33 | #endif |
---|
34 | |
---|
35 | #include <stdio.h> |
---|
36 | #include <string.h> |
---|
37 | #include <stdlib.h> |
---|
38 | #include <ctype.h> |
---|
39 | |
---|
40 | #include "decode.h" |
---|
41 | #include "error.h" |
---|
42 | #include "util.h" |
---|
43 | #include "language.h" |
---|
44 | #include "buffer.h" |
---|
45 | #include "base64.h" |
---|
46 | #include "libdspam.h" |
---|
47 | |
---|
48 | /* |
---|
49 | * _ds_actualize_message (const char *message) |
---|
50 | * |
---|
51 | * DESCRIPTION |
---|
52 | * primary message parser |
---|
53 | * |
---|
54 | * this function performs all decoding and actualization of the message |
---|
55 | * into the message structures defined in the .h |
---|
56 | * |
---|
57 | * INPUT ARGUMENTS |
---|
58 | * message message to decode |
---|
59 | * |
---|
60 | * RETURN VALUES |
---|
61 | * pointer to an allocated message structure (ds_message_t), NULL on failure |
---|
62 | */ |
---|
63 | |
---|
64 | ds_message_t |
---|
65 | _ds_actualize_message (const char *message) |
---|
66 | { |
---|
67 | char *line = NULL; |
---|
68 | char *in = NULL; |
---|
69 | char *m_in = NULL; |
---|
70 | ds_message_part_t current_block; |
---|
71 | ds_header_t current_heading = NULL; |
---|
72 | struct nt *boundaries = NULL; |
---|
73 | ds_message_t out = NULL; |
---|
74 | int block_position = BP_HEADER; |
---|
75 | int in_content = 0; |
---|
76 | |
---|
77 | if (!message || !(*message)) |
---|
78 | goto MEMFAIL; |
---|
79 | |
---|
80 | if (!(in = strdup(message))) |
---|
81 | goto MEMFAIL; |
---|
82 | |
---|
83 | m_in = in; |
---|
84 | |
---|
85 | boundaries = nt_create (NT_CHAR); |
---|
86 | if (!boundaries) |
---|
87 | goto MEMFAIL; |
---|
88 | |
---|
89 | out = (ds_message_t) calloc (1, sizeof (struct _ds_message)); |
---|
90 | if (!out) |
---|
91 | goto MEMFAIL; |
---|
92 | |
---|
93 | out->components = nt_create (NT_PTR); |
---|
94 | if (!out->components) |
---|
95 | goto MEMFAIL; |
---|
96 | |
---|
97 | current_block = _ds_create_message_part (); |
---|
98 | if (!current_block) |
---|
99 | goto MEMFAIL; |
---|
100 | |
---|
101 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
102 | goto MEMFAIL; |
---|
103 | |
---|
104 | /* Read the message from memory */ |
---|
105 | |
---|
106 | line = strsep (&in, "\n"); |
---|
107 | while (line) |
---|
108 | { |
---|
109 | |
---|
110 | /* Header processing */ |
---|
111 | |
---|
112 | if (block_position == BP_HEADER) |
---|
113 | { |
---|
114 | |
---|
115 | /* If we see two boundaries converged on top of one another */ |
---|
116 | |
---|
117 | if (_ds_match_boundary (boundaries, line)) |
---|
118 | { |
---|
119 | |
---|
120 | /* Add the boundary as the terminating boundary */ |
---|
121 | |
---|
122 | current_block->terminating_boundary = strdup (line + 2); |
---|
123 | current_block->original_encoding = current_block->encoding; |
---|
124 | |
---|
125 | _ds_decode_headers(current_block); |
---|
126 | current_block = _ds_create_message_part (); |
---|
127 | |
---|
128 | if (!current_block) |
---|
129 | goto MEMFAIL; |
---|
130 | |
---|
131 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
132 | goto MEMFAIL; |
---|
133 | |
---|
134 | block_position = BP_HEADER; |
---|
135 | } |
---|
136 | |
---|
137 | /* Concatenate multiline headers to the original header field data */ |
---|
138 | |
---|
139 | else if (line[0] == 32 || line[0] == '\t') |
---|
140 | { |
---|
141 | if (current_heading) |
---|
142 | { |
---|
143 | char *eow, *ptr; |
---|
144 | |
---|
145 | ptr = realloc (current_heading->data, |
---|
146 | strlen (current_heading->data) + strlen (line) + 2); |
---|
147 | if (ptr) |
---|
148 | { |
---|
149 | current_heading->data = ptr; |
---|
150 | strcat (current_heading->data, "\n"); |
---|
151 | strcat (current_heading->data, line); |
---|
152 | } else { |
---|
153 | goto MEMFAIL; |
---|
154 | } |
---|
155 | |
---|
156 | /* Our concatenated data doesn't have any whitespace between lines */ |
---|
157 | for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { } |
---|
158 | |
---|
159 | ptr = |
---|
160 | realloc (current_heading->concatenated_data, |
---|
161 | strlen (current_heading->concatenated_data) + strlen (eow) + 1); |
---|
162 | if (ptr) |
---|
163 | { |
---|
164 | current_heading->concatenated_data = ptr; |
---|
165 | strcat (current_heading->concatenated_data, eow); |
---|
166 | } else { |
---|
167 | goto MEMFAIL; |
---|
168 | } |
---|
169 | |
---|
170 | if (current_heading->original_data) { |
---|
171 | ptr = |
---|
172 | realloc (current_heading->original_data, |
---|
173 | strlen (current_heading->original_data) + |
---|
174 | strlen (line) + 2); |
---|
175 | if (ptr) { |
---|
176 | current_heading->original_data = ptr; |
---|
177 | strcat (current_heading->original_data, "\n"); |
---|
178 | strcat (current_heading->original_data, line); |
---|
179 | } else { |
---|
180 | goto MEMFAIL; |
---|
181 | } |
---|
182 | } |
---|
183 | |
---|
184 | _ds_analyze_header (current_block, current_heading, boundaries); |
---|
185 | } |
---|
186 | } |
---|
187 | |
---|
188 | /* New header field when LF or CRLF is not found */ |
---|
189 | |
---|
190 | else if (line[0] != 0 && line[0] != 13) |
---|
191 | { |
---|
192 | ds_header_t header = _ds_create_header_field (line); |
---|
193 | |
---|
194 | if (header != NULL) |
---|
195 | { |
---|
196 | _ds_analyze_header (current_block, header, boundaries); |
---|
197 | current_heading = header; |
---|
198 | nt_add (current_block->headers, header); |
---|
199 | } |
---|
200 | |
---|
201 | |
---|
202 | /* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */ |
---|
203 | |
---|
204 | } else { |
---|
205 | block_position = BP_BODY; |
---|
206 | } |
---|
207 | } |
---|
208 | |
---|
209 | /* Body processing */ |
---|
210 | |
---|
211 | else if (block_position == BP_BODY) |
---|
212 | { |
---|
213 | /* Look for a boundary in the header of a part */ |
---|
214 | |
---|
215 | if (!strncasecmp (line, "Content-Type", 12) |
---|
216 | || ((line[0] == 32 || line[0] == 9) && in_content)) |
---|
217 | { |
---|
218 | char boundary[128]; |
---|
219 | in_content = 1; |
---|
220 | if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) { |
---|
221 | if (!_ds_match_boundary (boundaries, boundary)) { |
---|
222 | _ds_push_boundary (boundaries, boundary); |
---|
223 | free(current_block->boundary); |
---|
224 | current_block->boundary = strdup (boundary); |
---|
225 | } |
---|
226 | } else { |
---|
227 | _ds_push_boundary (boundaries, ""); |
---|
228 | } |
---|
229 | } else { |
---|
230 | in_content = 0; |
---|
231 | } |
---|
232 | |
---|
233 | /* Multipart boundary was reached; move onto next block */ |
---|
234 | |
---|
235 | if (_ds_match_boundary (boundaries, line)) |
---|
236 | { |
---|
237 | |
---|
238 | /* Add the boundary as the terminating boundary */ |
---|
239 | |
---|
240 | current_block->terminating_boundary = strdup (line + 2); |
---|
241 | current_block->original_encoding = current_block->encoding; |
---|
242 | |
---|
243 | _ds_decode_headers(current_block); |
---|
244 | current_block = _ds_create_message_part (); |
---|
245 | |
---|
246 | if (!current_block) |
---|
247 | goto MEMFAIL; |
---|
248 | |
---|
249 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
250 | goto MEMFAIL; |
---|
251 | |
---|
252 | block_position = BP_HEADER; |
---|
253 | } |
---|
254 | |
---|
255 | /* Plain old message (or part) body */ |
---|
256 | |
---|
257 | else { |
---|
258 | buffer_cat (current_block->body, line); |
---|
259 | |
---|
260 | /* Don't add extra \n at the end of message's body */ |
---|
261 | |
---|
262 | if (in != NULL) |
---|
263 | buffer_cat (current_block->body, "\n"); |
---|
264 | } |
---|
265 | } |
---|
266 | |
---|
267 | line = strsep (&in, "\n"); |
---|
268 | } /* while (line) */ |
---|
269 | |
---|
270 | _ds_decode_headers(current_block); |
---|
271 | |
---|
272 | free (m_in); |
---|
273 | nt_destroy (boundaries); |
---|
274 | return out; |
---|
275 | |
---|
276 | MEMFAIL: |
---|
277 | if (m_in) free(m_in); |
---|
278 | if (boundaries) nt_destroy (boundaries); |
---|
279 | if (out) _ds_destroy_message(out); |
---|
280 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
281 | return NULL; |
---|
282 | } |
---|
283 | |
---|
284 | /* |
---|
285 | * _ds_create_message_part |
---|
286 | * |
---|
287 | * DESCRIPTION |
---|
288 | * create and initialize a new message block component |
---|
289 | * |
---|
290 | * RETURN VALUES |
---|
291 | * pointer to an allocated message block (ds_message_part_t), NULL on failure |
---|
292 | * |
---|
293 | */ |
---|
294 | |
---|
295 | ds_message_part_t |
---|
296 | _ds_create_message_part (void) |
---|
297 | { |
---|
298 | ds_message_part_t block = |
---|
299 | (ds_message_part_t) calloc (1, sizeof (struct _ds_message_part)); |
---|
300 | |
---|
301 | if (!block) |
---|
302 | goto MEMFAIL; |
---|
303 | |
---|
304 | block->headers = nt_create (NT_PTR); |
---|
305 | if (!block->headers) |
---|
306 | goto MEMFAIL; |
---|
307 | |
---|
308 | block->body = buffer_create (NULL); |
---|
309 | if (!block->body) |
---|
310 | goto MEMFAIL; |
---|
311 | |
---|
312 | block->encoding = EN_UNKNOWN; |
---|
313 | block->media_type = MT_TEXT; |
---|
314 | block->media_subtype = MST_PLAIN; |
---|
315 | block->original_encoding = EN_UNKNOWN; |
---|
316 | block->content_disposition = PCD_UNKNOWN; |
---|
317 | |
---|
318 | /* Not really necessary, but.. */ |
---|
319 | |
---|
320 | block->boundary = NULL; |
---|
321 | block->terminating_boundary = NULL; |
---|
322 | block->original_signed_body = NULL; |
---|
323 | |
---|
324 | |
---|
325 | return block; |
---|
326 | |
---|
327 | MEMFAIL: |
---|
328 | if (block) { |
---|
329 | buffer_destroy(block->body); |
---|
330 | nt_destroy(block->headers); |
---|
331 | free(block); |
---|
332 | } |
---|
333 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
334 | return NULL; |
---|
335 | } |
---|
336 | |
---|
337 | /* |
---|
338 | * _ds_create_header_field(const char *heading) |
---|
339 | * |
---|
340 | * DESCRIPTION |
---|
341 | * create and initialize a new header structure |
---|
342 | * |
---|
343 | * INPUT ARGUMENTS |
---|
344 | * heading plain text heading (e.g. "To: Mom") |
---|
345 | * |
---|
346 | * RETURN VALUES |
---|
347 | * pointer to an allocated header structure (ds_header_t), NULL on failure |
---|
348 | */ |
---|
349 | |
---|
350 | ds_header_t |
---|
351 | _ds_create_header_field (const char *heading) |
---|
352 | { |
---|
353 | char *in = strdup(heading); |
---|
354 | char *ptr, *m = in, *data; |
---|
355 | ds_header_t header = |
---|
356 | (ds_header_t) calloc (1, sizeof (struct _ds_header_field)); |
---|
357 | |
---|
358 | if (!header || !in) |
---|
359 | goto MEMFAIL; |
---|
360 | |
---|
361 | ptr = strsep (&in, ":"); |
---|
362 | if (ptr) { |
---|
363 | header->heading = strdup (ptr); |
---|
364 | if (!header->heading) |
---|
365 | goto MEMFAIL; |
---|
366 | else |
---|
367 | { |
---|
368 | if (!in) |
---|
369 | { |
---|
370 | LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't " |
---|
371 | "contains `:' character", __FILE__, __LINE__, header->heading); |
---|
372 | |
---|
373 | /* Use empty string as data as fallback for comtinue processing. */ |
---|
374 | |
---|
375 | in = ""; |
---|
376 | } |
---|
377 | else |
---|
378 | { |
---|
379 | /* Skip white space */ |
---|
380 | while (*in == 32 || *in == 9) |
---|
381 | ++in; |
---|
382 | } |
---|
383 | |
---|
384 | data = strdup (in); |
---|
385 | if (!data) |
---|
386 | goto MEMFAIL; |
---|
387 | |
---|
388 | header->data = data; |
---|
389 | header->concatenated_data = strdup(data); |
---|
390 | } |
---|
391 | } |
---|
392 | |
---|
393 | free (m); |
---|
394 | return header; |
---|
395 | |
---|
396 | MEMFAIL: |
---|
397 | free(header); |
---|
398 | free(m); |
---|
399 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
400 | return NULL; |
---|
401 | } |
---|
402 | |
---|
403 | /* |
---|
404 | * _ds_decode_headers (ds_message_part_t block) |
---|
405 | * |
---|
406 | * DESCRIPTION |
---|
407 | * decodes in-line encoded headers |
---|
408 | * |
---|
409 | * RETURN VALUES |
---|
410 | * returns 0 on success |
---|
411 | */ |
---|
412 | |
---|
413 | int |
---|
414 | _ds_decode_headers (ds_message_part_t block) { |
---|
415 | #ifdef VERBOSE |
---|
416 | LOGDEBUG("decoding headers in message block"); |
---|
417 | #endif |
---|
418 | char *ptr, *dptr, *rest, *enc; |
---|
419 | ds_header_t header; |
---|
420 | struct nt_node *node_nt; |
---|
421 | struct nt_c c_nt; |
---|
422 | long decoded_len; |
---|
423 | |
---|
424 | node_nt = c_nt_first(block->headers, &c_nt); |
---|
425 | while(node_nt != NULL) { |
---|
426 | long enc_offset; |
---|
427 | header = (ds_header_t) node_nt->ptr; |
---|
428 | |
---|
429 | for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++) |
---|
430 | { |
---|
431 | enc = header->concatenated_data + enc_offset; |
---|
432 | |
---|
433 | if (!strncmp(enc, "=?", 2)) { |
---|
434 | int was_null = 0; |
---|
435 | char *ptrptr, *decoded = NULL; |
---|
436 | long offset = (long) enc - (long) header->concatenated_data; |
---|
437 | |
---|
438 | if (header->original_data == NULL) { |
---|
439 | header->original_data = strdup(header->data); |
---|
440 | was_null = 1; |
---|
441 | } |
---|
442 | |
---|
443 | strtok_r (enc, "?", &ptrptr); |
---|
444 | strtok_r (NULL, "?", &ptrptr); |
---|
445 | ptr = strtok_r (NULL, "?", &ptrptr); |
---|
446 | dptr = strtok_r (NULL, "?", &ptrptr); |
---|
447 | if (!dptr) { |
---|
448 | if (was_null && header->original_data != NULL) |
---|
449 | free(header->original_data); |
---|
450 | if (was_null) |
---|
451 | header->original_data = NULL; |
---|
452 | continue; |
---|
453 | } |
---|
454 | |
---|
455 | rest = dptr + strlen (dptr); |
---|
456 | if (rest[0]!=0) { |
---|
457 | rest++; |
---|
458 | if (rest[0]!=0) rest++; |
---|
459 | } |
---|
460 | |
---|
461 | if (ptr != NULL && (ptr[0] == 'b' || ptr[0] == 'B')) |
---|
462 | decoded = _ds_decode_base64 (dptr); |
---|
463 | else if (ptr != NULL && (ptr[0] == 'q' || ptr[0] == 'Q')) |
---|
464 | decoded = _ds_decode_quoted (dptr); |
---|
465 | |
---|
466 | decoded_len = 0; |
---|
467 | |
---|
468 | /* Append the rest of the message */ |
---|
469 | |
---|
470 | if (decoded) |
---|
471 | { |
---|
472 | char *new_alloc; |
---|
473 | |
---|
474 | decoded_len = strlen(decoded); |
---|
475 | new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2); |
---|
476 | if (new_alloc == NULL) { |
---|
477 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
478 | } |
---|
479 | else |
---|
480 | { |
---|
481 | if (offset) |
---|
482 | strncpy(new_alloc, header->concatenated_data, offset); |
---|
483 | |
---|
484 | strcat(new_alloc, decoded); |
---|
485 | strcat(new_alloc, rest); |
---|
486 | free(decoded); |
---|
487 | decoded = new_alloc; |
---|
488 | } |
---|
489 | } |
---|
490 | |
---|
491 | if (decoded) { |
---|
492 | enc_offset += (decoded_len-1); |
---|
493 | free(header->concatenated_data); |
---|
494 | header->concatenated_data = decoded; |
---|
495 | } |
---|
496 | else if (was_null && header->original_data) { |
---|
497 | free(header->original_data); |
---|
498 | header->original_data = NULL; |
---|
499 | } |
---|
500 | else if (was_null) { |
---|
501 | header->original_data = NULL; |
---|
502 | } |
---|
503 | } |
---|
504 | } |
---|
505 | |
---|
506 | if (header->original_data != NULL) { |
---|
507 | free(header->data); |
---|
508 | header->data = strdup(header->concatenated_data); |
---|
509 | } |
---|
510 | |
---|
511 | node_nt = c_nt_next(block->headers, &c_nt); |
---|
512 | } |
---|
513 | |
---|
514 | return 0; |
---|
515 | } |
---|
516 | |
---|
517 | /* |
---|
518 | * _ds_analyze_header (ds_message_part_t block, ds_header_t header, |
---|
519 | * struct nt *boundaries) |
---|
520 | * |
---|
521 | * DESCRIPTION |
---|
522 | * analyzes the header passed in and performs various operations including: |
---|
523 | * - setting media type and subtype |
---|
524 | * - setting transfer encoding |
---|
525 | * - adding newly discovered boundaries |
---|
526 | * |
---|
527 | * based on the heading specified. essentially all headers should be |
---|
528 | * analyzed for future expansion |
---|
529 | * |
---|
530 | * INPUT ARGUMENTS |
---|
531 | * block the message block to which the header belongs |
---|
532 | * header the header to analyze |
---|
533 | * boundaries a list of known boundaries found within the block |
---|
534 | */ |
---|
535 | |
---|
536 | void |
---|
537 | _ds_analyze_header ( |
---|
538 | ds_message_part_t block, |
---|
539 | ds_header_t header, |
---|
540 | struct nt *boundaries) |
---|
541 | { |
---|
542 | if (!header || !block || !header->data) |
---|
543 | return; |
---|
544 | |
---|
545 | /* Content-Type header */ |
---|
546 | |
---|
547 | if (!strcasecmp (header->heading, "Content-Type")) |
---|
548 | { |
---|
549 | int len = strlen(header->data); |
---|
550 | if (!strncasecmp (header->data, "text", 4)) { |
---|
551 | block->media_type = MT_TEXT; |
---|
552 | if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5)) |
---|
553 | block->media_subtype = MST_PLAIN; |
---|
554 | else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4)) |
---|
555 | block->media_subtype = MST_HTML; |
---|
556 | else |
---|
557 | block->media_subtype = MST_OTHER; |
---|
558 | } |
---|
559 | |
---|
560 | else if (!strncasecmp (header->data, "application", 11)) |
---|
561 | { |
---|
562 | block->media_type = MT_APPLICATION; |
---|
563 | if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15)) |
---|
564 | block->media_subtype = MST_DSPAM_SIGNATURE; |
---|
565 | else |
---|
566 | block->media_subtype = MST_OTHER; |
---|
567 | } |
---|
568 | |
---|
569 | else if (!strncasecmp (header->data, "message", 7)) |
---|
570 | { |
---|
571 | block->media_type = MT_MESSAGE; |
---|
572 | if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6)) |
---|
573 | block->media_subtype = MST_RFC822; |
---|
574 | else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11)) |
---|
575 | block->media_subtype = MST_INOCULATION; |
---|
576 | else |
---|
577 | block->media_subtype = MST_OTHER; |
---|
578 | } |
---|
579 | |
---|
580 | else if (!strncasecmp (header->data, "multipart", 9)) |
---|
581 | { |
---|
582 | char boundary[128]; |
---|
583 | |
---|
584 | block->media_type = MT_MULTIPART; |
---|
585 | if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5)) |
---|
586 | block->media_subtype = MST_MIXED; |
---|
587 | else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11)) |
---|
588 | block->media_subtype = MST_ALTERNATIVE; |
---|
589 | else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6)) |
---|
590 | block->media_subtype = MST_SIGNED; |
---|
591 | else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9)) |
---|
592 | block->media_subtype = MST_ENCRYPTED; |
---|
593 | else |
---|
594 | block->media_subtype = MST_OTHER; |
---|
595 | |
---|
596 | if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) { |
---|
597 | if (!_ds_match_boundary (boundaries, boundary)) { |
---|
598 | _ds_push_boundary (boundaries, boundary); |
---|
599 | free(block->boundary); |
---|
600 | block->boundary = strdup (boundary); |
---|
601 | } |
---|
602 | } else { |
---|
603 | _ds_push_boundary (boundaries, ""); |
---|
604 | } |
---|
605 | } |
---|
606 | else { |
---|
607 | block->media_type = MT_OTHER; |
---|
608 | block->media_subtype = MST_OTHER; |
---|
609 | } |
---|
610 | |
---|
611 | } |
---|
612 | |
---|
613 | /* Content-Transfer-Encoding */ |
---|
614 | |
---|
615 | else if (!strcasecmp (header->heading, "Content-Transfer-Encoding")) |
---|
616 | { |
---|
617 | if (!strncasecmp (header->data, "7bit", 4)) |
---|
618 | block->encoding = EN_7BIT; |
---|
619 | else if (!strncasecmp (header->data, "8bit", 4)) |
---|
620 | block->encoding = EN_8BIT; |
---|
621 | else if (!strncasecmp (header->data, "quoted-printable", 16)) |
---|
622 | block->encoding = EN_QUOTED_PRINTABLE; |
---|
623 | else if (!strncasecmp (header->data, "base64", 6)) |
---|
624 | block->encoding = EN_BASE64; |
---|
625 | else if (!strncasecmp (header->data, "binary", 6)) |
---|
626 | block->encoding = EN_BINARY; |
---|
627 | else |
---|
628 | block->encoding = EN_OTHER; |
---|
629 | } |
---|
630 | |
---|
631 | if (!strcasecmp (header->heading, "Content-Disposition")) |
---|
632 | { |
---|
633 | if (!strncasecmp (header->data, "inline", 6)) |
---|
634 | block->content_disposition = PCD_INLINE; |
---|
635 | else if (!strncasecmp (header->data, "attachment", 10)) |
---|
636 | block->content_disposition = PCD_ATTACHMENT; |
---|
637 | else |
---|
638 | block->content_disposition = PCD_OTHER; |
---|
639 | } |
---|
640 | |
---|
641 | return; |
---|
642 | } |
---|
643 | |
---|
644 | /* |
---|
645 | * _ds_destroy_message (ds_message_t message) |
---|
646 | * |
---|
647 | * DESCRIPTION |
---|
648 | * destroys a message structure (ds_message_t) |
---|
649 | * |
---|
650 | * INPUT ARGUMENTS |
---|
651 | * message the message structure to be destroyed |
---|
652 | */ |
---|
653 | |
---|
654 | void |
---|
655 | _ds_destroy_message (ds_message_t message) |
---|
656 | { |
---|
657 | struct nt_node *node_nt; |
---|
658 | struct nt_c c; |
---|
659 | |
---|
660 | if (message == NULL) |
---|
661 | return; |
---|
662 | |
---|
663 | if (message->components) { |
---|
664 | node_nt = c_nt_first (message->components, &c); |
---|
665 | while (node_nt != NULL) |
---|
666 | { |
---|
667 | ds_message_part_t block = (ds_message_part_t) node_nt->ptr; |
---|
668 | _ds_destroy_block(block); |
---|
669 | node_nt = c_nt_next (message->components, &c); |
---|
670 | } |
---|
671 | nt_destroy (message->components); |
---|
672 | } |
---|
673 | free (message); |
---|
674 | return; |
---|
675 | } |
---|
676 | |
---|
677 | /* |
---|
678 | * _ds_destroy_headers (ds_message_part_t block) |
---|
679 | * |
---|
680 | * DESCRIPTION |
---|
681 | * destroys a message block's header pairs |
---|
682 | * does not free the structures themselves; these are freed at nt_destroy |
---|
683 | * |
---|
684 | * INPUT ARGUMENTS |
---|
685 | * block the message block containing the headers to destsroy |
---|
686 | */ |
---|
687 | |
---|
688 | void |
---|
689 | _ds_destroy_headers (ds_message_part_t block) |
---|
690 | { |
---|
691 | struct nt_node *node_nt; |
---|
692 | struct nt_c c; |
---|
693 | |
---|
694 | if (!block || !block->headers) |
---|
695 | return; |
---|
696 | |
---|
697 | node_nt = c_nt_first (block->headers, &c); |
---|
698 | while (node_nt != NULL) |
---|
699 | { |
---|
700 | ds_header_t field = (ds_header_t) node_nt->ptr; |
---|
701 | |
---|
702 | if (field) |
---|
703 | { |
---|
704 | free (field->original_data); |
---|
705 | free (field->heading); |
---|
706 | free (field->concatenated_data); |
---|
707 | free (field->data); |
---|
708 | } |
---|
709 | node_nt = c_nt_next (block->headers, &c); |
---|
710 | } |
---|
711 | |
---|
712 | return; |
---|
713 | } |
---|
714 | |
---|
715 | /* |
---|
716 | * _ds_destroy_block (ds_message_part_t block) |
---|
717 | * |
---|
718 | * DESCRIPTION |
---|
719 | * destroys a message block |
---|
720 | * |
---|
721 | * INPUT ARGUMENTS |
---|
722 | * block the message block to destroy |
---|
723 | */ |
---|
724 | |
---|
725 | void |
---|
726 | _ds_destroy_block (ds_message_part_t block) |
---|
727 | { |
---|
728 | if (!block) |
---|
729 | return; |
---|
730 | |
---|
731 | if (block->headers) |
---|
732 | { |
---|
733 | _ds_destroy_headers (block); |
---|
734 | nt_destroy (block->headers); |
---|
735 | } |
---|
736 | buffer_destroy (block->body); |
---|
737 | buffer_destroy (block->original_signed_body); |
---|
738 | free (block->boundary); |
---|
739 | free (block->terminating_boundary); |
---|
740 | // free (block); |
---|
741 | return; |
---|
742 | } |
---|
743 | |
---|
744 | /* |
---|
745 | * _ds_decode_block (ds_message_part_t block) |
---|
746 | * |
---|
747 | * DESCRIPTION |
---|
748 | * decodes a message block |
---|
749 | * |
---|
750 | * INPUT ARGUMENTS |
---|
751 | * block the message block to decode |
---|
752 | * |
---|
753 | * RETURN VALUES |
---|
754 | * a pointer to the allocated character array containing the decoded message |
---|
755 | * NULL on failure |
---|
756 | */ |
---|
757 | |
---|
758 | char * |
---|
759 | _ds_decode_block (ds_message_part_t block) |
---|
760 | { |
---|
761 | if (block->encoding == EN_BASE64) |
---|
762 | return _ds_decode_base64 (block->body->data); |
---|
763 | else if (block->encoding == EN_QUOTED_PRINTABLE) |
---|
764 | return _ds_decode_quoted (block->body->data); |
---|
765 | |
---|
766 | LOG (LOG_WARNING, "decoding of block encoding type %d not supported", |
---|
767 | block->encoding); |
---|
768 | return NULL; |
---|
769 | } |
---|
770 | |
---|
771 | /* |
---|
772 | * _ds_decode_{base64,quoted,hex8bit} |
---|
773 | * |
---|
774 | * DESCRIPTION |
---|
775 | * supporting block decoder functions |
---|
776 | * these function call (or perform) specific decoding functions |
---|
777 | * |
---|
778 | * INPUT ARGUMENTS |
---|
779 | * body encoded message body |
---|
780 | * |
---|
781 | * RETURN VALUES |
---|
782 | * a pointer to the allocated character array containing the decoded body |
---|
783 | */ |
---|
784 | |
---|
785 | char * |
---|
786 | _ds_decode_base64 (const char *body) |
---|
787 | { |
---|
788 | if (body == NULL) |
---|
789 | return NULL; |
---|
790 | |
---|
791 | return base64decode (body); |
---|
792 | } |
---|
793 | |
---|
794 | char * |
---|
795 | _ds_decode_quoted (const char *body) |
---|
796 | { |
---|
797 | #ifdef VERBOSE |
---|
798 | LOGDEBUG("decoding Quoted Printable encoded buffer"); |
---|
799 | #endif |
---|
800 | if (!body) |
---|
801 | return NULL; |
---|
802 | |
---|
803 | char *n, *out; |
---|
804 | const char *end, *p; |
---|
805 | |
---|
806 | n = out = malloc(strlen(body)+1); |
---|
807 | end = body + strlen(body); |
---|
808 | |
---|
809 | if (out == NULL) { |
---|
810 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
811 | return NULL; |
---|
812 | } |
---|
813 | |
---|
814 | for (p = body; p < end; p++, n++) { |
---|
815 | if (*p == '=') { |
---|
816 | if (p[1] == '\r' && p[2] == '\n') { |
---|
817 | n -= 1; |
---|
818 | p += 2; |
---|
819 | } else if (p[1] == '\n') { |
---|
820 | n -= 1; |
---|
821 | p += 1; |
---|
822 | } else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { |
---|
823 | *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); |
---|
824 | p += 2; |
---|
825 | } else |
---|
826 | *n = *p; |
---|
827 | } else |
---|
828 | *n = *p; |
---|
829 | } |
---|
830 | |
---|
831 | *n = '\0'; |
---|
832 | return (char *)out; |
---|
833 | } |
---|
834 | |
---|
835 | char * |
---|
836 | _ds_decode_hex8bit (const char *body) |
---|
837 | { |
---|
838 | #ifdef VERBOSE |
---|
839 | LOGDEBUG("decoding hexadecimal 8-bit encodings in message block"); |
---|
840 | #endif |
---|
841 | if (!body) |
---|
842 | return NULL; |
---|
843 | |
---|
844 | char *n, *out; |
---|
845 | const char *end, *p; |
---|
846 | |
---|
847 | n = out = malloc(strlen(body)+1); |
---|
848 | end = body + strlen(body); |
---|
849 | |
---|
850 | if (out == NULL) { |
---|
851 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
852 | return NULL; |
---|
853 | } |
---|
854 | |
---|
855 | for (p = body; p < end; p++, n++) { |
---|
856 | if (*p == '%') |
---|
857 | if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { |
---|
858 | *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); |
---|
859 | p += 2; |
---|
860 | } else |
---|
861 | *n = *p; |
---|
862 | else |
---|
863 | *n = *p; |
---|
864 | } |
---|
865 | |
---|
866 | *n = '\0'; |
---|
867 | return (char *)out; |
---|
868 | } |
---|
869 | |
---|
870 | /* |
---|
871 | * _ds_encode_block (ds_message_part_t block, int encoding) |
---|
872 | * |
---|
873 | * DESCRIPTION |
---|
874 | * encodes a message block using the encoding specified and replaces the |
---|
875 | * block's message body with the encoded data |
---|
876 | * |
---|
877 | * INPUT ARGUMENTS |
---|
878 | * block the message block to encode |
---|
879 | * encoding encoding to use (EN_) |
---|
880 | * |
---|
881 | * RETURN VALUES |
---|
882 | * returns 0 on success |
---|
883 | */ |
---|
884 | |
---|
885 | int |
---|
886 | _ds_encode_block (ds_message_part_t block, int encoding) |
---|
887 | { |
---|
888 | /* we can't encode a block with the same encoding */ |
---|
889 | |
---|
890 | if (block->encoding == encoding) |
---|
891 | return EINVAL; |
---|
892 | |
---|
893 | /* we can't encode a block that's already encoded */ |
---|
894 | |
---|
895 | if (block->encoding == EN_BASE64 || block->encoding == EN_QUOTED_PRINTABLE) |
---|
896 | return EFAILURE; |
---|
897 | |
---|
898 | if (encoding == EN_BASE64) { |
---|
899 | char *encoded = _ds_encode_base64 (block->body->data); |
---|
900 | buffer_destroy (block->body); |
---|
901 | block->body = buffer_create (encoded); |
---|
902 | free (encoded); |
---|
903 | block->encoding = EN_BASE64; |
---|
904 | } |
---|
905 | else if (encoding == EN_QUOTED_PRINTABLE) { |
---|
906 | |
---|
907 | /* TODO */ |
---|
908 | |
---|
909 | return 0; |
---|
910 | } |
---|
911 | |
---|
912 | LOGDEBUG("unsupported encoding: %d", encoding); |
---|
913 | return 0; |
---|
914 | } |
---|
915 | |
---|
916 | /* |
---|
917 | * _ds_encode_{base64,quoted} |
---|
918 | * |
---|
919 | * DESCRIPTION |
---|
920 | * supporting block encoder functions |
---|
921 | * these function call (or perform) specific encoding functions |
---|
922 | * |
---|
923 | * INPUT ARGUMENTS |
---|
924 | * body decoded message body |
---|
925 | * |
---|
926 | * RETURN VALUES |
---|
927 | * a pointer to the allocated character array containing the encoded body |
---|
928 | */ |
---|
929 | |
---|
930 | char * |
---|
931 | _ds_encode_base64 (const char *body) |
---|
932 | { |
---|
933 | return base64encode (body); |
---|
934 | } |
---|
935 | |
---|
936 | /* |
---|
937 | * _ds_assemble_message (ds_message_t message) |
---|
938 | * |
---|
939 | * DESCRIPTION |
---|
940 | * assembles a message structure into a flat text message |
---|
941 | * |
---|
942 | * INPUT ARGUMENTS |
---|
943 | * message the message structure (ds_message_t) to assemble |
---|
944 | * |
---|
945 | * RETURN VALUES |
---|
946 | * a pointer to the allocated character array containing the text message |
---|
947 | */ |
---|
948 | |
---|
949 | char * |
---|
950 | _ds_assemble_message (ds_message_t message, const char *newline) |
---|
951 | { |
---|
952 | buffer *out = buffer_create (NULL); |
---|
953 | struct nt_node *node_nt, *node_header; |
---|
954 | struct nt_c c_nt, c_nt2; |
---|
955 | char *heading; |
---|
956 | char *copyback; |
---|
957 | #ifdef VERBOSE |
---|
958 | int i = 0; |
---|
959 | #endif |
---|
960 | |
---|
961 | if (!out) { |
---|
962 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
963 | return NULL; |
---|
964 | } |
---|
965 | |
---|
966 | node_nt = c_nt_first (message->components, &c_nt); |
---|
967 | while (node_nt != NULL && node_nt->ptr != NULL) |
---|
968 | { |
---|
969 | ds_message_part_t block = |
---|
970 | (ds_message_part_t) node_nt->ptr; |
---|
971 | #ifdef VERBOSE |
---|
972 | LOGDEBUG ("assembling component %d", i); |
---|
973 | #endif |
---|
974 | |
---|
975 | /* Assemble headers */ |
---|
976 | |
---|
977 | if (block->headers != NULL && block->headers->items > 0) |
---|
978 | { |
---|
979 | node_header = c_nt_first (block->headers, &c_nt2); |
---|
980 | while (node_header != NULL) |
---|
981 | { |
---|
982 | char *data; |
---|
983 | ds_header_t current_header = |
---|
984 | (ds_header_t) node_header->ptr; |
---|
985 | |
---|
986 | data = (current_header->original_data == NULL) ? current_header->data : |
---|
987 | current_header->original_data; |
---|
988 | |
---|
989 | heading = malloc( |
---|
990 | ((current_header->heading) ? strlen(current_header->heading) : 0) |
---|
991 | + ((data) ? strlen(data) : 0) |
---|
992 | + 3 + strlen(newline)); |
---|
993 | |
---|
994 | if (current_header->heading != NULL && |
---|
995 | (!strncmp (current_header->heading, "From ", 5) || |
---|
996 | !strncmp (current_header->heading, "--", 2))) |
---|
997 | sprintf (heading, "%s:%s%s", |
---|
998 | (current_header->heading) ? current_header->heading : "", |
---|
999 | (data) ? data : "", newline); |
---|
1000 | else |
---|
1001 | sprintf (heading, "%s: %s%s", |
---|
1002 | (current_header->heading) ? current_header->heading : "", |
---|
1003 | (data) ? data : "", newline); |
---|
1004 | |
---|
1005 | buffer_cat (out, heading); |
---|
1006 | free(heading); |
---|
1007 | node_header = c_nt_next (block->headers, &c_nt2); |
---|
1008 | } |
---|
1009 | } |
---|
1010 | |
---|
1011 | buffer_cat (out, newline); |
---|
1012 | |
---|
1013 | /* Assemble bodies */ |
---|
1014 | |
---|
1015 | if (block->original_signed_body != NULL && message->protect) |
---|
1016 | buffer_cat (out, block->original_signed_body->data); |
---|
1017 | else |
---|
1018 | buffer_cat (out, block->body->data); |
---|
1019 | |
---|
1020 | if (block->terminating_boundary != NULL) |
---|
1021 | { |
---|
1022 | buffer_cat (out, "--"); |
---|
1023 | buffer_cat (out, block->terminating_boundary); |
---|
1024 | } |
---|
1025 | |
---|
1026 | node_nt = c_nt_next (message->components, &c_nt); |
---|
1027 | #ifdef VERBOSE |
---|
1028 | i++; |
---|
1029 | #endif |
---|
1030 | |
---|
1031 | if (node_nt != NULL && node_nt->ptr != NULL) |
---|
1032 | buffer_cat (out, newline); |
---|
1033 | } |
---|
1034 | |
---|
1035 | copyback = out->data; |
---|
1036 | out->data = NULL; |
---|
1037 | buffer_destroy (out); |
---|
1038 | return copyback; |
---|
1039 | } |
---|
1040 | |
---|
1041 | /* |
---|
1042 | * _ds_{push,pop,match,extract}_boundary |
---|
1043 | * |
---|
1044 | * DESCRIPTION |
---|
1045 | * these functions maintain and service a boundary "stack" on the message |
---|
1046 | */ |
---|
1047 | |
---|
1048 | int |
---|
1049 | _ds_push_boundary (struct nt *stack, const char *boundary) |
---|
1050 | { |
---|
1051 | char *y; |
---|
1052 | |
---|
1053 | if (boundary == NULL || boundary[0] == 0) |
---|
1054 | return EINVAL; |
---|
1055 | |
---|
1056 | y = malloc (strlen (boundary) + 3); |
---|
1057 | if (y == NULL) |
---|
1058 | return EUNKNOWN; |
---|
1059 | |
---|
1060 | sprintf (y, "--%s", boundary); |
---|
1061 | nt_add (stack, (char *) y); |
---|
1062 | free(y); |
---|
1063 | |
---|
1064 | return 0; |
---|
1065 | } |
---|
1066 | |
---|
1067 | char * |
---|
1068 | _ds_pop_boundary (struct nt *stack) |
---|
1069 | { |
---|
1070 | struct nt_node *node, *last_node = NULL, *parent_node = NULL; |
---|
1071 | struct nt_c c; |
---|
1072 | char *boundary = NULL; |
---|
1073 | |
---|
1074 | node = c_nt_first (stack, &c); |
---|
1075 | while (node != NULL) |
---|
1076 | { |
---|
1077 | parent_node = last_node; |
---|
1078 | last_node = node; |
---|
1079 | node = c_nt_next (stack, &c); |
---|
1080 | } |
---|
1081 | if (parent_node != NULL) |
---|
1082 | parent_node->next = NULL; |
---|
1083 | else |
---|
1084 | stack->first = NULL; |
---|
1085 | |
---|
1086 | if (last_node == NULL) |
---|
1087 | return NULL; |
---|
1088 | |
---|
1089 | boundary = strdup (last_node->ptr); |
---|
1090 | |
---|
1091 | free (last_node->ptr); |
---|
1092 | free (last_node); |
---|
1093 | |
---|
1094 | return boundary; |
---|
1095 | } |
---|
1096 | |
---|
1097 | int |
---|
1098 | _ds_match_boundary (struct nt *stack, const char *buff) |
---|
1099 | { |
---|
1100 | struct nt_node *node; |
---|
1101 | struct nt_c c; |
---|
1102 | |
---|
1103 | node = c_nt_first (stack, &c); |
---|
1104 | while (node != NULL) |
---|
1105 | { |
---|
1106 | if (!strncmp (buff, node->ptr, strlen (node->ptr))) |
---|
1107 | { |
---|
1108 | return 1; |
---|
1109 | } |
---|
1110 | node = c_nt_next (stack, &c); |
---|
1111 | } |
---|
1112 | return 0; |
---|
1113 | } |
---|
1114 | |
---|
1115 | int |
---|
1116 | _ds_extract_boundary (char *buf, size_t size, char *mem) |
---|
1117 | { |
---|
1118 | char *data, *ptr, *ptrptr; |
---|
1119 | |
---|
1120 | if (mem == NULL) |
---|
1121 | return EINVAL; |
---|
1122 | |
---|
1123 | data = strdup(mem); |
---|
1124 | if (data == NULL) { |
---|
1125 | LOG(LOG_CRIT, ERR_MEM_ALLOC); |
---|
1126 | return EUNKNOWN; |
---|
1127 | } |
---|
1128 | |
---|
1129 | for(ptr=data;ptr<(data+strlen(data));ptr++) { |
---|
1130 | if (!strncasecmp(ptr, "boundary", 8)) { |
---|
1131 | ptr = strchr(ptr, '='); |
---|
1132 | if (ptr == NULL) { |
---|
1133 | free(data); |
---|
1134 | return EFAILURE; |
---|
1135 | } |
---|
1136 | ptr++; |
---|
1137 | while(isspace((int) ptr[0])) |
---|
1138 | ptr++; |
---|
1139 | if (ptr[0] == '"') |
---|
1140 | ptr++; |
---|
1141 | strtok_r(ptr, " \";\n\t", &ptrptr); |
---|
1142 | strlcpy(buf, ptr, size); |
---|
1143 | free(data); |
---|
1144 | return 0; |
---|
1145 | } |
---|
1146 | } |
---|
1147 | |
---|
1148 | free(data); |
---|
1149 | return EFAILURE; |
---|
1150 | } |
---|
1151 | |
---|
1152 | /* |
---|
1153 | * _ds_find_header (ds_message_t message, consr char *heading) { |
---|
1154 | * |
---|
1155 | * DESCRIPTION |
---|
1156 | * finds a header and returns its value |
---|
1157 | * |
---|
1158 | * INPUT ARGUMENTS |
---|
1159 | * message the message structure to search |
---|
1160 | * heading the heading to search for |
---|
1161 | * flags optional search flags |
---|
1162 | * |
---|
1163 | * RETURN VALUES |
---|
1164 | * a pointer to the header structure's value |
---|
1165 | * |
---|
1166 | */ |
---|
1167 | |
---|
1168 | char * |
---|
1169 | _ds_find_header (ds_message_t message, const char *heading) { |
---|
1170 | ds_message_part_t block; |
---|
1171 | ds_header_t head; |
---|
1172 | struct nt_node *node_nt; |
---|
1173 | |
---|
1174 | if (message->components->first) { |
---|
1175 | if ((block = message->components->first->ptr)==NULL) |
---|
1176 | return NULL; |
---|
1177 | if (block->headers == NULL) |
---|
1178 | return NULL; |
---|
1179 | } else { |
---|
1180 | return NULL; |
---|
1181 | } |
---|
1182 | |
---|
1183 | node_nt = block->headers->first; |
---|
1184 | while(node_nt != NULL) { |
---|
1185 | head = (ds_header_t) node_nt->ptr; |
---|
1186 | if (head && !strcasecmp(head->heading, heading)) { |
---|
1187 | return head->data; |
---|
1188 | } |
---|
1189 | node_nt = node_nt->next; |
---|
1190 | } |
---|
1191 | |
---|
1192 | return NULL; |
---|
1193 | } |
---|
1194 | |
---|
1195 | int _ds_hex2dec(unsigned char hex) { |
---|
1196 | switch (hex) { |
---|
1197 | case '0': return 0; |
---|
1198 | case '1': return 1; |
---|
1199 | case '2': return 2; |
---|
1200 | case '3': return 3; |
---|
1201 | case '4': return 4; |
---|
1202 | case '5': return 5; |
---|
1203 | case '6': return 6; |
---|
1204 | case '7': return 7; |
---|
1205 | case '8': return 8; |
---|
1206 | case '9': return 9; |
---|
1207 | case 'a': case 'A': return 10; |
---|
1208 | case 'b': case 'B': return 11; |
---|
1209 | case 'c': case 'C': return 12; |
---|
1210 | case 'd': case 'D': return 13; |
---|
1211 | case 'e': case 'E': return 14; |
---|
1212 | case 'f': case 'F': return 15; |
---|
1213 | default: return -1; |
---|
1214 | } |
---|
1215 | } |
---|
1216 | |
---|
1217 | /* |
---|
1218 | * _ds_strip_html(const char *html) |
---|
1219 | * |
---|
1220 | * DESCRIPTION |
---|
1221 | * strip html tags from the supplied message |
---|
1222 | * |
---|
1223 | * INPUT ARGUMENTS |
---|
1224 | * html encoded message body |
---|
1225 | * |
---|
1226 | * RETURN VALUES |
---|
1227 | * a pointer to the allocated character array containing the |
---|
1228 | * stripped message |
---|
1229 | * |
---|
1230 | */ |
---|
1231 | |
---|
1232 | char * |
---|
1233 | _ds_strip_html (const char *html) |
---|
1234 | { |
---|
1235 | #ifdef VERBOSE |
---|
1236 | LOGDEBUG("stripping HTML tags from message block"); |
---|
1237 | #endif |
---|
1238 | size_t j = 0, k = 0, i = 0; |
---|
1239 | int visible = 1; |
---|
1240 | int closing_td_tag = 0; |
---|
1241 | char *html2; |
---|
1242 | const char *cdata_close_tag = NULL; |
---|
1243 | |
---|
1244 | if(!html) |
---|
1245 | return NULL; |
---|
1246 | |
---|
1247 | static struct { |
---|
1248 | unsigned int id; |
---|
1249 | char *entity; |
---|
1250 | } |
---|
1251 | charset[] = { |
---|
1252 | { 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" }, |
---|
1253 | { 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" }, |
---|
1254 | { 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" }, |
---|
1255 | { 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" }, |
---|
1256 | { 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" }, |
---|
1257 | { 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" }, |
---|
1258 | { 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" }, |
---|
1259 | { 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" }, |
---|
1260 | { 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" }, |
---|
1261 | { 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" }, |
---|
1262 | { 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" }, |
---|
1263 | { 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" }, |
---|
1264 | { 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" }, |
---|
1265 | { 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" }, |
---|
1266 | { 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" }, |
---|
1267 | { 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" }, |
---|
1268 | { 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" }, |
---|
1269 | { 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" }, |
---|
1270 | { 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" }, |
---|
1271 | { 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" }, |
---|
1272 | { 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" }, |
---|
1273 | { 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" }, |
---|
1274 | { 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" }, |
---|
1275 | { 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" }, |
---|
1276 | { 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" }, |
---|
1277 | { 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" }, |
---|
1278 | { 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" }, |
---|
1279 | { 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" }, |
---|
1280 | { 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" }, |
---|
1281 | { 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" }, |
---|
1282 | { 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" }, |
---|
1283 | { 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" }, |
---|
1284 | { 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" }, |
---|
1285 | { 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" }, |
---|
1286 | { 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" }, |
---|
1287 | { 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" }, |
---|
1288 | { 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" }, |
---|
1289 | { 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" }, |
---|
1290 | { 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" }, |
---|
1291 | { 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" }, |
---|
1292 | { 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" }, |
---|
1293 | { 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " }, |
---|
1294 | { 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" }, |
---|
1295 | { 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" }, |
---|
1296 | { 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" }, |
---|
1297 | { 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" }, |
---|
1298 | { 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" }, |
---|
1299 | { 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" }, |
---|
1300 | { 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" }, |
---|
1301 | { 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" }, |
---|
1302 | { 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" }, |
---|
1303 | { 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" }, |
---|
1304 | { 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" }, |
---|
1305 | { 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" }, |
---|
1306 | { 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" }, |
---|
1307 | { 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" }, |
---|
1308 | { 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" }, |
---|
1309 | { 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" }, |
---|
1310 | { 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" }, |
---|
1311 | { 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" }, |
---|
1312 | { 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" }, |
---|
1313 | { 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" }, |
---|
1314 | { 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" }, |
---|
1315 | { 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" }, |
---|
1316 | { 9829, "♥" }, {9830, "♦" } |
---|
1317 | }; |
---|
1318 | int num_chars = sizeof(charset) / sizeof(charset[0]); |
---|
1319 | |
---|
1320 | static struct { |
---|
1321 | char *open_tag; |
---|
1322 | char *uri_tag; |
---|
1323 | } |
---|
1324 | uritag[] = { |
---|
1325 | { "<a", "href" }, { "<img", "src" }, { "<input", "src" }, |
---|
1326 | { "<iframe", "src" }, { "<frame", "src" }, { "<script", "src" }, |
---|
1327 | { "<form", "action" }, { "<embed", "src" }, { "<area", "href" }, |
---|
1328 | { "<base", "href" }, { "<link", "href" }, { "<source", "src" }, |
---|
1329 | { "<body", "background" }, { "<blockquote", "cite" }, { "<q", "cite" }, |
---|
1330 | { "<ins", "cite" }, { "<del", "cite" } |
---|
1331 | }; |
---|
1332 | int num_uri = sizeof(uritag) / sizeof(uritag[0]); |
---|
1333 | |
---|
1334 | size_t len = strlen(html); |
---|
1335 | html2 = malloc(len+1); |
---|
1336 | |
---|
1337 | if (html2 == NULL) { |
---|
1338 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
1339 | return NULL; |
---|
1340 | } |
---|
1341 | |
---|
1342 | for (i = 0; i < len; i++) { |
---|
1343 | if (html[i] == '<') { |
---|
1344 | if (cdata_close_tag) { |
---|
1345 | if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) { |
---|
1346 | i += strlen(cdata_close_tag) - 1; |
---|
1347 | cdata_close_tag = NULL; |
---|
1348 | } |
---|
1349 | continue; |
---|
1350 | } else if (strncasecmp(html + i, "</td>", 5) == 0) { |
---|
1351 | i += 4; |
---|
1352 | closing_td_tag = 1; |
---|
1353 | continue; |
---|
1354 | } else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) { |
---|
1355 | if (j > 0 && !isspace(html2[j-1])) { |
---|
1356 | html2[j++]=' '; |
---|
1357 | } |
---|
1358 | visible = 0; |
---|
1359 | } else { |
---|
1360 | closing_td_tag = 0; |
---|
1361 | visible = 1; |
---|
1362 | } |
---|
1363 | k = i + 1; |
---|
1364 | |
---|
1365 | if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) || |
---|
1366 | (html[k] >= 97 && html[k] <= 122) || |
---|
1367 | (html[k] == 47) || |
---|
1368 | (html[k] == 33) ))) { |
---|
1369 | /* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */ |
---|
1370 | visible = 1; |
---|
1371 | html2[j++]=html[i]; |
---|
1372 | i = k; |
---|
1373 | const char *w = &(html[k]); |
---|
1374 | while (j < len && (size_t)(w - html) < len && *w != '<') { |
---|
1375 | html2[j++]=*w; |
---|
1376 | w++; |
---|
1377 | i++; |
---|
1378 | } |
---|
1379 | continue; |
---|
1380 | } else if (html[k]) { |
---|
1381 | /* find the end of the tag */ |
---|
1382 | while (k < len && html[k] != '<' && html[k] != '>') {k++;} |
---|
1383 | |
---|
1384 | /* if we've got a tag with a uri, save the address to print later. */ |
---|
1385 | char *url_tag = " "; |
---|
1386 | int tag_offset = 0, x = 0, y = 0; |
---|
1387 | for (y = 0; y < num_uri; y++) { |
---|
1388 | x = strlen(uritag[y].open_tag); |
---|
1389 | if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) { |
---|
1390 | url_tag = uritag[y].uri_tag; |
---|
1391 | tag_offset = i + x + 1; |
---|
1392 | break; |
---|
1393 | } |
---|
1394 | } |
---|
1395 | /* tag with uri found */ |
---|
1396 | if (tag_offset > 0) { |
---|
1397 | size_t url_start; /* start of url tag inclusive [ */ |
---|
1398 | size_t url_tag_len = strlen(url_tag); |
---|
1399 | char delim = ' '; |
---|
1400 | /* find start of uri */ |
---|
1401 | for (url_start = tag_offset; url_start <= k; url_start++) { |
---|
1402 | if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) { |
---|
1403 | url_start += url_tag_len; |
---|
1404 | while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */ |
---|
1405 | if (html[url_start] == '=') { |
---|
1406 | url_start++; |
---|
1407 | while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */ |
---|
1408 | if (html[url_start] == '"') { |
---|
1409 | delim = '"'; |
---|
1410 | url_start++; |
---|
1411 | } else if (html[url_start] == '\'') { |
---|
1412 | delim = '\''; |
---|
1413 | url_start++; |
---|
1414 | } else { |
---|
1415 | delim = '>'; |
---|
1416 | } |
---|
1417 | break; |
---|
1418 | } else { |
---|
1419 | /* Start of uri tag found but no '=' after the tag. |
---|
1420 | * Skip the whole tag. |
---|
1421 | */ |
---|
1422 | break; |
---|
1423 | } |
---|
1424 | } else if ((url_start - tag_offset) >= 50) { |
---|
1425 | /* The length of the html tag is over 50 characters long without |
---|
1426 | * finding the start of the url/uri. Skip the whole tag. |
---|
1427 | */ |
---|
1428 | break; |
---|
1429 | } |
---|
1430 | } |
---|
1431 | /* find end of uri */ |
---|
1432 | if (delim != ' ') { |
---|
1433 | if (url_start < len && |
---|
1434 | (strncasecmp(html + url_start, "http:", 5) == 0 || |
---|
1435 | strncasecmp(html + url_start, "https:", 6) == 0 || |
---|
1436 | strncasecmp(html + url_start, "ftp:", 4) == 0)) { |
---|
1437 | html2[j++]=' '; |
---|
1438 | const char *w = &(html[url_start]); |
---|
1439 | /* html2 is a buffer of len + 1, where the +1 is for NULL |
---|
1440 | * termination. This means we only want to loop to len |
---|
1441 | * since we will replace html2[j] right after the loop. |
---|
1442 | */ |
---|
1443 | while (j < len && (size_t)(w - html) < len && *w != delim) { |
---|
1444 | html2[j++]=*w; |
---|
1445 | w++; |
---|
1446 | } |
---|
1447 | html2[j++]=' '; |
---|
1448 | } |
---|
1449 | } |
---|
1450 | } else if (strncasecmp(html + i, "<p>", 3) == 0 |
---|
1451 | || strncasecmp(html + i, "<p ", 3) == 0 |
---|
1452 | || strncasecmp(html + i, "<p\t", 3) == 0 |
---|
1453 | || strncasecmp(html + i, "<tr", 3) == 0 |
---|
1454 | || strncasecmp(html + i, "<option", 7) == 0 |
---|
1455 | || strncasecmp(html + i, "<br", 3) == 0 |
---|
1456 | || strncasecmp(html + i, "<li", 3) == 0 |
---|
1457 | || strncasecmp(html + i, "<div", 4) == 0 |
---|
1458 | || strncasecmp(html + i, "</select>", 9) == 0 |
---|
1459 | || strncasecmp(html + i, "</table>", 8) == 0) { |
---|
1460 | if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') { |
---|
1461 | html2[j++] = '\n'; |
---|
1462 | } |
---|
1463 | } else if (strncasecmp(html + i, "<applet", 7) == 0) { |
---|
1464 | cdata_close_tag = "</applet>"; |
---|
1465 | } else if (strncasecmp(html + i, "<embed", 6) == 0) { |
---|
1466 | cdata_close_tag = "</embed>"; |
---|
1467 | } else if (strncasecmp(html + i, "<frameset", 9) == 0) { |
---|
1468 | cdata_close_tag = "</frameset>"; |
---|
1469 | } else if (strncasecmp(html + i, "<frame", 6) == 0) { |
---|
1470 | cdata_close_tag = "</frame>"; |
---|
1471 | } else if (strncasecmp(html + i, "<iframe", 7) == 0) { |
---|
1472 | cdata_close_tag = "</iframe>"; |
---|
1473 | } else if (strncasecmp(html + i, "<noembed", 8) == 0) { |
---|
1474 | cdata_close_tag = "</noembed>"; |
---|
1475 | } else if (strncasecmp(html + i, "<noscript", 9) == 0) { |
---|
1476 | cdata_close_tag = "</noscript>"; |
---|
1477 | } else if (strncasecmp(html + i, "<object", 7) == 0) { |
---|
1478 | cdata_close_tag = "</object>"; |
---|
1479 | } else if (strncasecmp(html + i, "<script", 7) == 0) { |
---|
1480 | cdata_close_tag = "</script>"; |
---|
1481 | } else if (strncasecmp(html + i, "<style", 6) == 0) { |
---|
1482 | cdata_close_tag = "</style>"; |
---|
1483 | } |
---|
1484 | i = (html[k] == '<' || html[k] == '\0')? k - 1: k; |
---|
1485 | continue; |
---|
1486 | } |
---|
1487 | } else if (cdata_close_tag) { |
---|
1488 | continue; |
---|
1489 | } else if (!isspace(html[i])) { |
---|
1490 | visible = 1; |
---|
1491 | } |
---|
1492 | |
---|
1493 | if (strncmp(html+i,"&#",2)==0) { |
---|
1494 | int x = 0; |
---|
1495 | const char *w = &(html[i+2]); |
---|
1496 | while (*w == '0') {i++;w++;} |
---|
1497 | char n[5]; |
---|
1498 | if (html[i+4] && html[i+4] == ';' |
---|
1499 | && isdigit(html[i+2]) |
---|
1500 | && isdigit(html[i+3])) { |
---|
1501 | n[0] = html[i+2]; |
---|
1502 | n[1] = html[i+3]; |
---|
1503 | n[2] = 0; |
---|
1504 | x = atoi(n); |
---|
1505 | if (x <= 255 && x >= 32) |
---|
1506 | html2[j++] = x; |
---|
1507 | i += 4; |
---|
1508 | } else if (html[i+6] |
---|
1509 | && html[i+6] == ';' |
---|
1510 | && isdigit(html[i+2]) |
---|
1511 | && isdigit(html[i+3]) |
---|
1512 | && isdigit(html[i+4]) |
---|
1513 | && isdigit(html[i+5])) { |
---|
1514 | n[0] = html[i+2]; |
---|
1515 | n[1] = html[i+3]; |
---|
1516 | n[2] = html[i+4]; |
---|
1517 | n[3] = html[i+5]; |
---|
1518 | n[4] = 0; |
---|
1519 | x = atoi(n); |
---|
1520 | if (x <= 255 && x >= 32) |
---|
1521 | html2[j++] = x; |
---|
1522 | i += 6; |
---|
1523 | } else { |
---|
1524 | const char *w = &(html[i]); |
---|
1525 | while (*w != ';' && *w != ' ' && *w != '\t' && *w != '\0') {i++;w++;} |
---|
1526 | } |
---|
1527 | visible = 0; |
---|
1528 | continue; |
---|
1529 | } else if (html[i] == '&') { |
---|
1530 | int x = 0, y = 0; |
---|
1531 | for (y = 0; y < num_chars; y++) { |
---|
1532 | x = strlen(charset[y].entity); |
---|
1533 | if (strncasecmp(html+i,charset[y].entity,x)==0) { |
---|
1534 | if (charset[y].id <= 255) |
---|
1535 | html2[j++] = charset[y].id; |
---|
1536 | i += x-1; |
---|
1537 | visible = 0; |
---|
1538 | continue; |
---|
1539 | } |
---|
1540 | } |
---|
1541 | } |
---|
1542 | |
---|
1543 | if (j < len && visible) |
---|
1544 | html2[j++] = html[i]; |
---|
1545 | |
---|
1546 | if (j >= len) |
---|
1547 | i = j = len; |
---|
1548 | } |
---|
1549 | |
---|
1550 | html2[j] = '\0'; |
---|
1551 | return (char *)html2; |
---|
1552 | } |
---|