[c5c522c] | 1 | /* $Id: decode.c,v 1.395 2011/09/03 13:25:39 sbajic Exp $ */ |
---|
| 2 | |
---|
| 3 | /* |
---|
| 4 | DSPAM |
---|
| 5 | COPYRIGHT (C) 2002-2012 DSPAM PROJECT |
---|
| 6 | |
---|
| 7 | This program is free software: you can redistribute it and/or modify |
---|
| 8 | it under the terms of the GNU Affero General Public License as |
---|
| 9 | published by the Free Software Foundation, either version 3 of the |
---|
| 10 | License, or (at your option) any later version. |
---|
| 11 | |
---|
| 12 | This program is distributed in the hope that it will be useful, |
---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 15 | GNU Affero General Public License for more details. |
---|
| 16 | |
---|
| 17 | You should have received a copy of the GNU Affero General Public License |
---|
| 18 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
| 19 | |
---|
| 20 | */ |
---|
| 21 | |
---|
| 22 | /* |
---|
| 23 | * decode.c - message decoding and parsing |
---|
| 24 | * |
---|
| 25 | * DESCRIPTION |
---|
| 26 | * This set of functions performs parsing and decoding of a message and |
---|
| 27 | * embeds its components into a ds_message_t structure, suitable for |
---|
| 28 | * logical access. |
---|
| 29 | */ |
---|
| 30 | |
---|
| 31 | #ifdef HAVE_CONFIG_H |
---|
| 32 | #include <auto-config.h> |
---|
| 33 | #endif |
---|
| 34 | |
---|
| 35 | #include <stdio.h> |
---|
| 36 | #include <string.h> |
---|
| 37 | #include <stdlib.h> |
---|
| 38 | #include <ctype.h> |
---|
| 39 | |
---|
| 40 | #include "decode.h" |
---|
| 41 | #include "error.h" |
---|
| 42 | #include "util.h" |
---|
| 43 | #include "language.h" |
---|
| 44 | #include "buffer.h" |
---|
| 45 | #include "base64.h" |
---|
| 46 | #include "libdspam.h" |
---|
| 47 | |
---|
| 48 | /* |
---|
| 49 | * _ds_actualize_message (const char *message) |
---|
| 50 | * |
---|
| 51 | * DESCRIPTION |
---|
| 52 | * primary message parser |
---|
| 53 | * |
---|
| 54 | * this function performs all decoding and actualization of the message |
---|
| 55 | * into the message structures defined in the .h |
---|
| 56 | * |
---|
| 57 | * INPUT ARGUMENTS |
---|
| 58 | * message message to decode |
---|
| 59 | * |
---|
| 60 | * RETURN VALUES |
---|
| 61 | * pointer to an allocated message structure (ds_message_t), NULL on failure |
---|
| 62 | */ |
---|
| 63 | |
---|
| 64 | ds_message_t |
---|
| 65 | _ds_actualize_message (const char *message) |
---|
| 66 | { |
---|
| 67 | char *line = NULL; |
---|
| 68 | char *in = NULL; |
---|
| 69 | char *m_in = NULL; |
---|
| 70 | ds_message_part_t current_block; |
---|
| 71 | ds_header_t current_heading = NULL; |
---|
| 72 | struct nt *boundaries = NULL; |
---|
| 73 | ds_message_t out = NULL; |
---|
| 74 | int block_position = BP_HEADER; |
---|
| 75 | int in_content = 0; |
---|
| 76 | |
---|
| 77 | if (!message || !(*message)) |
---|
| 78 | goto MEMFAIL; |
---|
| 79 | |
---|
| 80 | if (!(in = strdup(message))) |
---|
| 81 | goto MEMFAIL; |
---|
| 82 | |
---|
| 83 | m_in = in; |
---|
| 84 | |
---|
| 85 | boundaries = nt_create (NT_CHAR); |
---|
| 86 | if (!boundaries) |
---|
| 87 | goto MEMFAIL; |
---|
| 88 | |
---|
| 89 | out = (ds_message_t) calloc (1, sizeof (struct _ds_message)); |
---|
| 90 | if (!out) |
---|
| 91 | goto MEMFAIL; |
---|
| 92 | |
---|
| 93 | out->components = nt_create (NT_PTR); |
---|
| 94 | if (!out->components) |
---|
| 95 | goto MEMFAIL; |
---|
| 96 | |
---|
| 97 | current_block = _ds_create_message_part (); |
---|
| 98 | if (!current_block) |
---|
| 99 | goto MEMFAIL; |
---|
| 100 | |
---|
| 101 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
| 102 | goto MEMFAIL; |
---|
| 103 | |
---|
| 104 | /* Read the message from memory */ |
---|
| 105 | |
---|
| 106 | line = strsep (&in, "\n"); |
---|
| 107 | while (line) |
---|
| 108 | { |
---|
| 109 | |
---|
| 110 | /* Header processing */ |
---|
| 111 | |
---|
| 112 | if (block_position == BP_HEADER) |
---|
| 113 | { |
---|
| 114 | |
---|
| 115 | /* If we see two boundaries converged on top of one another */ |
---|
| 116 | |
---|
| 117 | if (_ds_match_boundary (boundaries, line)) |
---|
| 118 | { |
---|
| 119 | |
---|
| 120 | /* Add the boundary as the terminating boundary */ |
---|
| 121 | |
---|
| 122 | current_block->terminating_boundary = strdup (line + 2); |
---|
| 123 | current_block->original_encoding = current_block->encoding; |
---|
| 124 | |
---|
| 125 | _ds_decode_headers(current_block); |
---|
| 126 | current_block = _ds_create_message_part (); |
---|
| 127 | |
---|
| 128 | if (!current_block) |
---|
| 129 | goto MEMFAIL; |
---|
| 130 | |
---|
| 131 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
| 132 | goto MEMFAIL; |
---|
| 133 | |
---|
| 134 | block_position = BP_HEADER; |
---|
| 135 | } |
---|
| 136 | |
---|
| 137 | /* Concatenate multiline headers to the original header field data */ |
---|
| 138 | |
---|
| 139 | else if (line[0] == 32 || line[0] == '\t') |
---|
| 140 | { |
---|
| 141 | if (current_heading) |
---|
| 142 | { |
---|
| 143 | char *eow, *ptr; |
---|
| 144 | |
---|
| 145 | ptr = realloc (current_heading->data, |
---|
| 146 | strlen (current_heading->data) + strlen (line) + 2); |
---|
| 147 | if (ptr) |
---|
| 148 | { |
---|
| 149 | current_heading->data = ptr; |
---|
| 150 | strcat (current_heading->data, "\n"); |
---|
| 151 | strcat (current_heading->data, line); |
---|
| 152 | } else { |
---|
| 153 | goto MEMFAIL; |
---|
| 154 | } |
---|
| 155 | |
---|
| 156 | /* Our concatenated data doesn't have any whitespace between lines */ |
---|
| 157 | for(eow=line;eow[0] && isspace((int) eow[0]);eow++) { } |
---|
| 158 | |
---|
| 159 | ptr = |
---|
| 160 | realloc (current_heading->concatenated_data, |
---|
| 161 | strlen (current_heading->concatenated_data) + strlen (eow) + 1); |
---|
| 162 | if (ptr) |
---|
| 163 | { |
---|
| 164 | current_heading->concatenated_data = ptr; |
---|
| 165 | strcat (current_heading->concatenated_data, eow); |
---|
| 166 | } else { |
---|
| 167 | goto MEMFAIL; |
---|
| 168 | } |
---|
| 169 | |
---|
| 170 | if (current_heading->original_data) { |
---|
| 171 | ptr = |
---|
| 172 | realloc (current_heading->original_data, |
---|
| 173 | strlen (current_heading->original_data) + |
---|
| 174 | strlen (line) + 2); |
---|
| 175 | if (ptr) { |
---|
| 176 | current_heading->original_data = ptr; |
---|
| 177 | strcat (current_heading->original_data, "\n"); |
---|
| 178 | strcat (current_heading->original_data, line); |
---|
| 179 | } else { |
---|
| 180 | goto MEMFAIL; |
---|
| 181 | } |
---|
| 182 | } |
---|
| 183 | |
---|
| 184 | _ds_analyze_header (current_block, current_heading, boundaries); |
---|
| 185 | } |
---|
| 186 | } |
---|
| 187 | |
---|
| 188 | /* New header field when LF or CRLF is not found */ |
---|
| 189 | |
---|
| 190 | else if (line[0] != 0 && line[0] != 13) |
---|
| 191 | { |
---|
| 192 | ds_header_t header = _ds_create_header_field (line); |
---|
| 193 | |
---|
| 194 | if (header != NULL) |
---|
| 195 | { |
---|
| 196 | _ds_analyze_header (current_block, header, boundaries); |
---|
| 197 | current_heading = header; |
---|
| 198 | nt_add (current_block->headers, header); |
---|
| 199 | } |
---|
| 200 | |
---|
| 201 | |
---|
| 202 | /* line[0] == 0 or line[0] == 13; LF or CRLF, switch to body */ |
---|
| 203 | |
---|
| 204 | } else { |
---|
| 205 | block_position = BP_BODY; |
---|
| 206 | } |
---|
| 207 | } |
---|
| 208 | |
---|
| 209 | /* Body processing */ |
---|
| 210 | |
---|
| 211 | else if (block_position == BP_BODY) |
---|
| 212 | { |
---|
| 213 | /* Look for a boundary in the header of a part */ |
---|
| 214 | |
---|
| 215 | if (!strncasecmp (line, "Content-Type", 12) |
---|
| 216 | || ((line[0] == 32 || line[0] == 9) && in_content)) |
---|
| 217 | { |
---|
| 218 | char boundary[128]; |
---|
| 219 | in_content = 1; |
---|
| 220 | if (!_ds_extract_boundary(boundary, sizeof(boundary), line)) { |
---|
| 221 | if (!_ds_match_boundary (boundaries, boundary)) { |
---|
| 222 | _ds_push_boundary (boundaries, boundary); |
---|
| 223 | free(current_block->boundary); |
---|
| 224 | current_block->boundary = strdup (boundary); |
---|
| 225 | } |
---|
| 226 | } else { |
---|
| 227 | _ds_push_boundary (boundaries, ""); |
---|
| 228 | } |
---|
| 229 | } else { |
---|
| 230 | in_content = 0; |
---|
| 231 | } |
---|
| 232 | |
---|
| 233 | /* Multipart boundary was reached; move onto next block */ |
---|
| 234 | |
---|
| 235 | if (_ds_match_boundary (boundaries, line)) |
---|
| 236 | { |
---|
| 237 | |
---|
| 238 | /* Add the boundary as the terminating boundary */ |
---|
| 239 | |
---|
| 240 | current_block->terminating_boundary = strdup (line + 2); |
---|
| 241 | current_block->original_encoding = current_block->encoding; |
---|
| 242 | |
---|
| 243 | _ds_decode_headers(current_block); |
---|
| 244 | current_block = _ds_create_message_part (); |
---|
| 245 | |
---|
| 246 | if (!current_block) |
---|
| 247 | goto MEMFAIL; |
---|
| 248 | |
---|
| 249 | if (nt_add (out->components, (void *) current_block) == NULL) |
---|
| 250 | goto MEMFAIL; |
---|
| 251 | |
---|
| 252 | block_position = BP_HEADER; |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | /* Plain old message (or part) body */ |
---|
| 256 | |
---|
| 257 | else { |
---|
| 258 | buffer_cat (current_block->body, line); |
---|
| 259 | |
---|
| 260 | /* Don't add extra \n at the end of message's body */ |
---|
| 261 | |
---|
| 262 | if (in != NULL) |
---|
| 263 | buffer_cat (current_block->body, "\n"); |
---|
| 264 | } |
---|
| 265 | } |
---|
| 266 | |
---|
| 267 | line = strsep (&in, "\n"); |
---|
| 268 | } /* while (line) */ |
---|
| 269 | |
---|
| 270 | _ds_decode_headers(current_block); |
---|
| 271 | |
---|
| 272 | free (m_in); |
---|
| 273 | nt_destroy (boundaries); |
---|
| 274 | return out; |
---|
| 275 | |
---|
| 276 | MEMFAIL: |
---|
| 277 | if (m_in) free(m_in); |
---|
| 278 | if (boundaries) nt_destroy (boundaries); |
---|
| 279 | if (out) _ds_destroy_message(out); |
---|
| 280 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 281 | return NULL; |
---|
| 282 | } |
---|
| 283 | |
---|
| 284 | /* |
---|
| 285 | * _ds_create_message_part |
---|
| 286 | * |
---|
| 287 | * DESCRIPTION |
---|
| 288 | * create and initialize a new message block component |
---|
| 289 | * |
---|
| 290 | * RETURN VALUES |
---|
| 291 | * pointer to an allocated message block (ds_message_part_t), NULL on failure |
---|
| 292 | * |
---|
| 293 | */ |
---|
| 294 | |
---|
| 295 | ds_message_part_t |
---|
| 296 | _ds_create_message_part (void) |
---|
| 297 | { |
---|
| 298 | ds_message_part_t block = |
---|
| 299 | (ds_message_part_t) calloc (1, sizeof (struct _ds_message_part)); |
---|
| 300 | |
---|
| 301 | if (!block) |
---|
| 302 | goto MEMFAIL; |
---|
| 303 | |
---|
| 304 | block->headers = nt_create (NT_PTR); |
---|
| 305 | if (!block->headers) |
---|
| 306 | goto MEMFAIL; |
---|
| 307 | |
---|
| 308 | block->body = buffer_create (NULL); |
---|
| 309 | if (!block->body) |
---|
| 310 | goto MEMFAIL; |
---|
| 311 | |
---|
| 312 | block->encoding = EN_UNKNOWN; |
---|
| 313 | block->media_type = MT_TEXT; |
---|
| 314 | block->media_subtype = MST_PLAIN; |
---|
| 315 | block->original_encoding = EN_UNKNOWN; |
---|
| 316 | block->content_disposition = PCD_UNKNOWN; |
---|
| 317 | |
---|
| 318 | /* Not really necessary, but.. */ |
---|
| 319 | |
---|
| 320 | block->boundary = NULL; |
---|
| 321 | block->terminating_boundary = NULL; |
---|
| 322 | block->original_signed_body = NULL; |
---|
| 323 | |
---|
| 324 | |
---|
| 325 | return block; |
---|
| 326 | |
---|
| 327 | MEMFAIL: |
---|
| 328 | if (block) { |
---|
| 329 | buffer_destroy(block->body); |
---|
| 330 | nt_destroy(block->headers); |
---|
| 331 | free(block); |
---|
| 332 | } |
---|
| 333 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 334 | return NULL; |
---|
| 335 | } |
---|
| 336 | |
---|
| 337 | /* |
---|
| 338 | * _ds_create_header_field(const char *heading) |
---|
| 339 | * |
---|
| 340 | * DESCRIPTION |
---|
| 341 | * create and initialize a new header structure |
---|
| 342 | * |
---|
| 343 | * INPUT ARGUMENTS |
---|
| 344 | * heading plain text heading (e.g. "To: Mom") |
---|
| 345 | * |
---|
| 346 | * RETURN VALUES |
---|
| 347 | * pointer to an allocated header structure (ds_header_t), NULL on failure |
---|
| 348 | */ |
---|
| 349 | |
---|
| 350 | ds_header_t |
---|
| 351 | _ds_create_header_field (const char *heading) |
---|
| 352 | { |
---|
| 353 | char *in = strdup(heading); |
---|
| 354 | char *ptr, *m = in, *data; |
---|
| 355 | ds_header_t header = |
---|
| 356 | (ds_header_t) calloc (1, sizeof (struct _ds_header_field)); |
---|
| 357 | |
---|
| 358 | if (!header || !in) |
---|
| 359 | goto MEMFAIL; |
---|
| 360 | |
---|
| 361 | ptr = strsep (&in, ":"); |
---|
| 362 | if (ptr) { |
---|
| 363 | header->heading = strdup (ptr); |
---|
| 364 | if (!header->heading) |
---|
| 365 | goto MEMFAIL; |
---|
| 366 | else |
---|
| 367 | { |
---|
| 368 | if (!in) |
---|
| 369 | { |
---|
| 370 | LOGDEBUG("%s:%u: unexpected data: header string '%s' doesn't " |
---|
| 371 | "contains `:' character", __FILE__, __LINE__, header->heading); |
---|
| 372 | |
---|
| 373 | /* Use empty string as data as fallback for comtinue processing. */ |
---|
| 374 | |
---|
| 375 | in = ""; |
---|
| 376 | } |
---|
| 377 | else |
---|
| 378 | { |
---|
| 379 | /* Skip white space */ |
---|
| 380 | while (*in == 32 || *in == 9) |
---|
| 381 | ++in; |
---|
| 382 | } |
---|
| 383 | |
---|
| 384 | data = strdup (in); |
---|
| 385 | if (!data) |
---|
| 386 | goto MEMFAIL; |
---|
| 387 | |
---|
| 388 | header->data = data; |
---|
| 389 | header->concatenated_data = strdup(data); |
---|
| 390 | } |
---|
| 391 | } |
---|
| 392 | |
---|
| 393 | free (m); |
---|
| 394 | return header; |
---|
| 395 | |
---|
| 396 | MEMFAIL: |
---|
| 397 | free(header); |
---|
| 398 | free(m); |
---|
| 399 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 400 | return NULL; |
---|
| 401 | } |
---|
| 402 | |
---|
| 403 | /* |
---|
| 404 | * _ds_decode_headers (ds_message_part_t block) |
---|
| 405 | * |
---|
| 406 | * DESCRIPTION |
---|
| 407 | * decodes in-line encoded headers |
---|
| 408 | * |
---|
| 409 | * RETURN VALUES |
---|
| 410 | * returns 0 on success |
---|
| 411 | */ |
---|
| 412 | |
---|
| 413 | int |
---|
| 414 | _ds_decode_headers (ds_message_part_t block) { |
---|
| 415 | #ifdef VERBOSE |
---|
| 416 | LOGDEBUG("decoding headers in message block"); |
---|
| 417 | #endif |
---|
| 418 | char *ptr, *dptr, *rest, *enc; |
---|
| 419 | ds_header_t header; |
---|
| 420 | struct nt_node *node_nt; |
---|
| 421 | struct nt_c c_nt; |
---|
| 422 | long decoded_len; |
---|
| 423 | |
---|
| 424 | node_nt = c_nt_first(block->headers, &c_nt); |
---|
| 425 | while(node_nt != NULL) { |
---|
| 426 | long enc_offset; |
---|
| 427 | header = (ds_header_t) node_nt->ptr; |
---|
| 428 | |
---|
| 429 | for(enc_offset = 0; header->concatenated_data[enc_offset]; enc_offset++) |
---|
| 430 | { |
---|
| 431 | enc = header->concatenated_data + enc_offset; |
---|
| 432 | |
---|
| 433 | if (!strncmp(enc, "=?", 2)) { |
---|
| 434 | int was_null = 0; |
---|
| 435 | char *ptrptr, *decoded = NULL; |
---|
| 436 | long offset = (long) enc - (long) header->concatenated_data; |
---|
| 437 | |
---|
| 438 | if (header->original_data == NULL) { |
---|
| 439 | header->original_data = strdup(header->data); |
---|
| 440 | was_null = 1; |
---|
| 441 | } |
---|
| 442 | |
---|
| 443 | strtok_r (enc, "?", &ptrptr); |
---|
| 444 | strtok_r (NULL, "?", &ptrptr); |
---|
| 445 | ptr = strtok_r (NULL, "?", &ptrptr); |
---|
| 446 | dptr = strtok_r (NULL, "?", &ptrptr); |
---|
| 447 | if (!dptr) { |
---|
| 448 | if (was_null && header->original_data != NULL) |
---|
| 449 | free(header->original_data); |
---|
| 450 | if (was_null) |
---|
| 451 | header->original_data = NULL; |
---|
| 452 | continue; |
---|
| 453 | } |
---|
| 454 | |
---|
| 455 | rest = dptr + strlen (dptr); |
---|
| 456 | if (rest[0]!=0) { |
---|
| 457 | rest++; |
---|
| 458 | if (rest[0]!=0) rest++; |
---|
| 459 | } |
---|
| 460 | |
---|
| 461 | if (ptr != NULL && (ptr[0] == 'b' || ptr[0] == 'B')) |
---|
| 462 | decoded = _ds_decode_base64 (dptr); |
---|
| 463 | else if (ptr != NULL && (ptr[0] == 'q' || ptr[0] == 'Q')) |
---|
| 464 | decoded = _ds_decode_quoted (dptr); |
---|
| 465 | |
---|
| 466 | decoded_len = 0; |
---|
| 467 | |
---|
| 468 | /* Append the rest of the message */ |
---|
| 469 | |
---|
| 470 | if (decoded) |
---|
| 471 | { |
---|
| 472 | char *new_alloc; |
---|
| 473 | |
---|
| 474 | decoded_len = strlen(decoded); |
---|
| 475 | new_alloc = calloc (1, offset + decoded_len + strlen (rest) + 2); |
---|
| 476 | if (new_alloc == NULL) { |
---|
| 477 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 478 | } |
---|
| 479 | else |
---|
| 480 | { |
---|
| 481 | if (offset) |
---|
| 482 | strncpy(new_alloc, header->concatenated_data, offset); |
---|
| 483 | |
---|
| 484 | strcat(new_alloc, decoded); |
---|
| 485 | strcat(new_alloc, rest); |
---|
| 486 | free(decoded); |
---|
| 487 | decoded = new_alloc; |
---|
| 488 | } |
---|
| 489 | } |
---|
| 490 | |
---|
| 491 | if (decoded) { |
---|
| 492 | enc_offset += (decoded_len-1); |
---|
| 493 | free(header->concatenated_data); |
---|
| 494 | header->concatenated_data = decoded; |
---|
| 495 | } |
---|
| 496 | else if (was_null && header->original_data) { |
---|
| 497 | free(header->original_data); |
---|
| 498 | header->original_data = NULL; |
---|
| 499 | } |
---|
| 500 | else if (was_null) { |
---|
| 501 | header->original_data = NULL; |
---|
| 502 | } |
---|
| 503 | } |
---|
| 504 | } |
---|
| 505 | |
---|
| 506 | if (header->original_data != NULL) { |
---|
| 507 | free(header->data); |
---|
| 508 | header->data = strdup(header->concatenated_data); |
---|
| 509 | } |
---|
| 510 | |
---|
| 511 | node_nt = c_nt_next(block->headers, &c_nt); |
---|
| 512 | } |
---|
| 513 | |
---|
| 514 | return 0; |
---|
| 515 | } |
---|
| 516 | |
---|
| 517 | /* |
---|
| 518 | * _ds_analyze_header (ds_message_part_t block, ds_header_t header, |
---|
| 519 | * struct nt *boundaries) |
---|
| 520 | * |
---|
| 521 | * DESCRIPTION |
---|
| 522 | * analyzes the header passed in and performs various operations including: |
---|
| 523 | * - setting media type and subtype |
---|
| 524 | * - setting transfer encoding |
---|
| 525 | * - adding newly discovered boundaries |
---|
| 526 | * |
---|
| 527 | * based on the heading specified. essentially all headers should be |
---|
| 528 | * analyzed for future expansion |
---|
| 529 | * |
---|
| 530 | * INPUT ARGUMENTS |
---|
| 531 | * block the message block to which the header belongs |
---|
| 532 | * header the header to analyze |
---|
| 533 | * boundaries a list of known boundaries found within the block |
---|
| 534 | */ |
---|
| 535 | |
---|
| 536 | void |
---|
| 537 | _ds_analyze_header ( |
---|
| 538 | ds_message_part_t block, |
---|
| 539 | ds_header_t header, |
---|
| 540 | struct nt *boundaries) |
---|
| 541 | { |
---|
| 542 | if (!header || !block || !header->data) |
---|
| 543 | return; |
---|
| 544 | |
---|
| 545 | /* Content-Type header */ |
---|
| 546 | |
---|
| 547 | if (!strcasecmp (header->heading, "Content-Type")) |
---|
| 548 | { |
---|
| 549 | int len = strlen(header->data); |
---|
| 550 | if (!strncasecmp (header->data, "text", 4)) { |
---|
| 551 | block->media_type = MT_TEXT; |
---|
| 552 | if (len >= 5 && !strncasecmp (header->data + 5, "plain", 5)) |
---|
| 553 | block->media_subtype = MST_PLAIN; |
---|
| 554 | else if (len >= 5 && !strncasecmp (header->data + 5, "html", 4)) |
---|
| 555 | block->media_subtype = MST_HTML; |
---|
| 556 | else |
---|
| 557 | block->media_subtype = MST_OTHER; |
---|
| 558 | } |
---|
| 559 | |
---|
| 560 | else if (!strncasecmp (header->data, "application", 11)) |
---|
| 561 | { |
---|
| 562 | block->media_type = MT_APPLICATION; |
---|
| 563 | if (len >= 12 && !strncasecmp (header->data + 12, "dspam-signature", 15)) |
---|
| 564 | block->media_subtype = MST_DSPAM_SIGNATURE; |
---|
| 565 | else |
---|
| 566 | block->media_subtype = MST_OTHER; |
---|
| 567 | } |
---|
| 568 | |
---|
| 569 | else if (!strncasecmp (header->data, "message", 7)) |
---|
| 570 | { |
---|
| 571 | block->media_type = MT_MESSAGE; |
---|
| 572 | if (len >= 8 && !strncasecmp (header->data + 8, "rfc822", 6)) |
---|
| 573 | block->media_subtype = MST_RFC822; |
---|
| 574 | else if (len >= 8 && !strncasecmp (header->data + 8, "inoculation", 11)) |
---|
| 575 | block->media_subtype = MST_INOCULATION; |
---|
| 576 | else |
---|
| 577 | block->media_subtype = MST_OTHER; |
---|
| 578 | } |
---|
| 579 | |
---|
| 580 | else if (!strncasecmp (header->data, "multipart", 9)) |
---|
| 581 | { |
---|
| 582 | char boundary[128]; |
---|
| 583 | |
---|
| 584 | block->media_type = MT_MULTIPART; |
---|
| 585 | if (len >= 10 && !strncasecmp (header->data + 10, "mixed", 5)) |
---|
| 586 | block->media_subtype = MST_MIXED; |
---|
| 587 | else if (len >= 10 && !strncasecmp (header->data + 10, "alternative", 11)) |
---|
| 588 | block->media_subtype = MST_ALTERNATIVE; |
---|
| 589 | else if (len >= 10 && !strncasecmp (header->data + 10, "signed", 6)) |
---|
| 590 | block->media_subtype = MST_SIGNED; |
---|
| 591 | else if (len >= 10 && !strncasecmp (header->data + 10, "encrypted", 9)) |
---|
| 592 | block->media_subtype = MST_ENCRYPTED; |
---|
| 593 | else |
---|
| 594 | block->media_subtype = MST_OTHER; |
---|
| 595 | |
---|
| 596 | if (!_ds_extract_boundary(boundary, sizeof(boundary), header->data)) { |
---|
| 597 | if (!_ds_match_boundary (boundaries, boundary)) { |
---|
| 598 | _ds_push_boundary (boundaries, boundary); |
---|
| 599 | free(block->boundary); |
---|
| 600 | block->boundary = strdup (boundary); |
---|
| 601 | } |
---|
| 602 | } else { |
---|
| 603 | _ds_push_boundary (boundaries, ""); |
---|
| 604 | } |
---|
| 605 | } |
---|
| 606 | else { |
---|
| 607 | block->media_type = MT_OTHER; |
---|
| 608 | block->media_subtype = MST_OTHER; |
---|
| 609 | } |
---|
| 610 | |
---|
| 611 | } |
---|
| 612 | |
---|
| 613 | /* Content-Transfer-Encoding */ |
---|
| 614 | |
---|
| 615 | else if (!strcasecmp (header->heading, "Content-Transfer-Encoding")) |
---|
| 616 | { |
---|
| 617 | if (!strncasecmp (header->data, "7bit", 4)) |
---|
| 618 | block->encoding = EN_7BIT; |
---|
| 619 | else if (!strncasecmp (header->data, "8bit", 4)) |
---|
| 620 | block->encoding = EN_8BIT; |
---|
| 621 | else if (!strncasecmp (header->data, "quoted-printable", 16)) |
---|
| 622 | block->encoding = EN_QUOTED_PRINTABLE; |
---|
| 623 | else if (!strncasecmp (header->data, "base64", 6)) |
---|
| 624 | block->encoding = EN_BASE64; |
---|
| 625 | else if (!strncasecmp (header->data, "binary", 6)) |
---|
| 626 | block->encoding = EN_BINARY; |
---|
| 627 | else |
---|
| 628 | block->encoding = EN_OTHER; |
---|
| 629 | } |
---|
| 630 | |
---|
| 631 | if (!strcasecmp (header->heading, "Content-Disposition")) |
---|
| 632 | { |
---|
| 633 | if (!strncasecmp (header->data, "inline", 6)) |
---|
| 634 | block->content_disposition = PCD_INLINE; |
---|
| 635 | else if (!strncasecmp (header->data, "attachment", 10)) |
---|
| 636 | block->content_disposition = PCD_ATTACHMENT; |
---|
| 637 | else |
---|
| 638 | block->content_disposition = PCD_OTHER; |
---|
| 639 | } |
---|
| 640 | |
---|
| 641 | return; |
---|
| 642 | } |
---|
| 643 | |
---|
| 644 | /* |
---|
| 645 | * _ds_destroy_message (ds_message_t message) |
---|
| 646 | * |
---|
| 647 | * DESCRIPTION |
---|
| 648 | * destroys a message structure (ds_message_t) |
---|
| 649 | * |
---|
| 650 | * INPUT ARGUMENTS |
---|
| 651 | * message the message structure to be destroyed |
---|
| 652 | */ |
---|
| 653 | |
---|
| 654 | void |
---|
| 655 | _ds_destroy_message (ds_message_t message) |
---|
| 656 | { |
---|
| 657 | struct nt_node *node_nt; |
---|
| 658 | struct nt_c c; |
---|
| 659 | |
---|
| 660 | if (message == NULL) |
---|
| 661 | return; |
---|
| 662 | |
---|
| 663 | if (message->components) { |
---|
| 664 | node_nt = c_nt_first (message->components, &c); |
---|
| 665 | while (node_nt != NULL) |
---|
| 666 | { |
---|
| 667 | ds_message_part_t block = (ds_message_part_t) node_nt->ptr; |
---|
| 668 | _ds_destroy_block(block); |
---|
| 669 | node_nt = c_nt_next (message->components, &c); |
---|
| 670 | } |
---|
| 671 | nt_destroy (message->components); |
---|
| 672 | } |
---|
| 673 | free (message); |
---|
| 674 | return; |
---|
| 675 | } |
---|
| 676 | |
---|
| 677 | /* |
---|
| 678 | * _ds_destroy_headers (ds_message_part_t block) |
---|
| 679 | * |
---|
| 680 | * DESCRIPTION |
---|
| 681 | * destroys a message block's header pairs |
---|
| 682 | * does not free the structures themselves; these are freed at nt_destroy |
---|
| 683 | * |
---|
| 684 | * INPUT ARGUMENTS |
---|
| 685 | * block the message block containing the headers to destsroy |
---|
| 686 | */ |
---|
| 687 | |
---|
| 688 | void |
---|
| 689 | _ds_destroy_headers (ds_message_part_t block) |
---|
| 690 | { |
---|
| 691 | struct nt_node *node_nt; |
---|
| 692 | struct nt_c c; |
---|
| 693 | |
---|
| 694 | if (!block || !block->headers) |
---|
| 695 | return; |
---|
| 696 | |
---|
| 697 | node_nt = c_nt_first (block->headers, &c); |
---|
| 698 | while (node_nt != NULL) |
---|
| 699 | { |
---|
| 700 | ds_header_t field = (ds_header_t) node_nt->ptr; |
---|
| 701 | |
---|
| 702 | if (field) |
---|
| 703 | { |
---|
| 704 | free (field->original_data); |
---|
| 705 | free (field->heading); |
---|
| 706 | free (field->concatenated_data); |
---|
| 707 | free (field->data); |
---|
| 708 | } |
---|
| 709 | node_nt = c_nt_next (block->headers, &c); |
---|
| 710 | } |
---|
| 711 | |
---|
| 712 | return; |
---|
| 713 | } |
---|
| 714 | |
---|
| 715 | /* |
---|
| 716 | * _ds_destroy_block (ds_message_part_t block) |
---|
| 717 | * |
---|
| 718 | * DESCRIPTION |
---|
| 719 | * destroys a message block |
---|
| 720 | * |
---|
| 721 | * INPUT ARGUMENTS |
---|
| 722 | * block the message block to destroy |
---|
| 723 | */ |
---|
| 724 | |
---|
| 725 | void |
---|
| 726 | _ds_destroy_block (ds_message_part_t block) |
---|
| 727 | { |
---|
| 728 | if (!block) |
---|
| 729 | return; |
---|
| 730 | |
---|
| 731 | if (block->headers) |
---|
| 732 | { |
---|
| 733 | _ds_destroy_headers (block); |
---|
| 734 | nt_destroy (block->headers); |
---|
| 735 | } |
---|
| 736 | buffer_destroy (block->body); |
---|
| 737 | buffer_destroy (block->original_signed_body); |
---|
| 738 | free (block->boundary); |
---|
| 739 | free (block->terminating_boundary); |
---|
| 740 | // free (block); |
---|
| 741 | return; |
---|
| 742 | } |
---|
| 743 | |
---|
| 744 | /* |
---|
| 745 | * _ds_decode_block (ds_message_part_t block) |
---|
| 746 | * |
---|
| 747 | * DESCRIPTION |
---|
| 748 | * decodes a message block |
---|
| 749 | * |
---|
| 750 | * INPUT ARGUMENTS |
---|
| 751 | * block the message block to decode |
---|
| 752 | * |
---|
| 753 | * RETURN VALUES |
---|
| 754 | * a pointer to the allocated character array containing the decoded message |
---|
| 755 | * NULL on failure |
---|
| 756 | */ |
---|
| 757 | |
---|
| 758 | char * |
---|
| 759 | _ds_decode_block (ds_message_part_t block) |
---|
| 760 | { |
---|
| 761 | if (block->encoding == EN_BASE64) |
---|
| 762 | return _ds_decode_base64 (block->body->data); |
---|
| 763 | else if (block->encoding == EN_QUOTED_PRINTABLE) |
---|
| 764 | return _ds_decode_quoted (block->body->data); |
---|
| 765 | |
---|
| 766 | LOG (LOG_WARNING, "decoding of block encoding type %d not supported", |
---|
| 767 | block->encoding); |
---|
| 768 | return NULL; |
---|
| 769 | } |
---|
| 770 | |
---|
| 771 | /* |
---|
| 772 | * _ds_decode_{base64,quoted,hex8bit} |
---|
| 773 | * |
---|
| 774 | * DESCRIPTION |
---|
| 775 | * supporting block decoder functions |
---|
| 776 | * these function call (or perform) specific decoding functions |
---|
| 777 | * |
---|
| 778 | * INPUT ARGUMENTS |
---|
| 779 | * body encoded message body |
---|
| 780 | * |
---|
| 781 | * RETURN VALUES |
---|
| 782 | * a pointer to the allocated character array containing the decoded body |
---|
| 783 | */ |
---|
| 784 | |
---|
| 785 | char * |
---|
| 786 | _ds_decode_base64 (const char *body) |
---|
| 787 | { |
---|
| 788 | if (body == NULL) |
---|
| 789 | return NULL; |
---|
| 790 | |
---|
| 791 | return base64decode (body); |
---|
| 792 | } |
---|
| 793 | |
---|
| 794 | char * |
---|
| 795 | _ds_decode_quoted (const char *body) |
---|
| 796 | { |
---|
| 797 | #ifdef VERBOSE |
---|
| 798 | LOGDEBUG("decoding Quoted Printable encoded buffer"); |
---|
| 799 | #endif |
---|
| 800 | if (!body) |
---|
| 801 | return NULL; |
---|
| 802 | |
---|
| 803 | char *n, *out; |
---|
| 804 | const char *end, *p; |
---|
| 805 | |
---|
| 806 | n = out = malloc(strlen(body)+1); |
---|
| 807 | end = body + strlen(body); |
---|
| 808 | |
---|
| 809 | if (out == NULL) { |
---|
| 810 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 811 | return NULL; |
---|
| 812 | } |
---|
| 813 | |
---|
| 814 | for (p = body; p < end; p++, n++) { |
---|
| 815 | if (*p == '=') { |
---|
| 816 | if (p[1] == '\r' && p[2] == '\n') { |
---|
| 817 | n -= 1; |
---|
| 818 | p += 2; |
---|
| 819 | } else if (p[1] == '\n') { |
---|
| 820 | n -= 1; |
---|
| 821 | p += 1; |
---|
| 822 | } else if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { |
---|
| 823 | *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); |
---|
| 824 | p += 2; |
---|
| 825 | } else |
---|
| 826 | *n = *p; |
---|
| 827 | } else |
---|
| 828 | *n = *p; |
---|
| 829 | } |
---|
| 830 | |
---|
| 831 | *n = '\0'; |
---|
| 832 | return (char *)out; |
---|
| 833 | } |
---|
| 834 | |
---|
| 835 | char * |
---|
| 836 | _ds_decode_hex8bit (const char *body) |
---|
| 837 | { |
---|
| 838 | #ifdef VERBOSE |
---|
| 839 | LOGDEBUG("decoding hexadecimal 8-bit encodings in message block"); |
---|
| 840 | #endif |
---|
| 841 | if (!body) |
---|
| 842 | return NULL; |
---|
| 843 | |
---|
| 844 | char *n, *out; |
---|
| 845 | const char *end, *p; |
---|
| 846 | |
---|
| 847 | n = out = malloc(strlen(body)+1); |
---|
| 848 | end = body + strlen(body); |
---|
| 849 | |
---|
| 850 | if (out == NULL) { |
---|
| 851 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 852 | return NULL; |
---|
| 853 | } |
---|
| 854 | |
---|
| 855 | for (p = body; p < end; p++, n++) { |
---|
| 856 | if (*p == '%') |
---|
| 857 | if (p[1] && p[2] && isxdigit((unsigned char) p[1]) && isxdigit((unsigned char) p[2])) { |
---|
| 858 | *n = ((_ds_hex2dec((unsigned char) p[1])) << 4) | (_ds_hex2dec((unsigned char) p[2])); |
---|
| 859 | p += 2; |
---|
| 860 | } else |
---|
| 861 | *n = *p; |
---|
| 862 | else |
---|
| 863 | *n = *p; |
---|
| 864 | } |
---|
| 865 | |
---|
| 866 | *n = '\0'; |
---|
| 867 | return (char *)out; |
---|
| 868 | } |
---|
| 869 | |
---|
| 870 | /* |
---|
| 871 | * _ds_encode_block (ds_message_part_t block, int encoding) |
---|
| 872 | * |
---|
| 873 | * DESCRIPTION |
---|
| 874 | * encodes a message block using the encoding specified and replaces the |
---|
| 875 | * block's message body with the encoded data |
---|
| 876 | * |
---|
| 877 | * INPUT ARGUMENTS |
---|
| 878 | * block the message block to encode |
---|
| 879 | * encoding encoding to use (EN_) |
---|
| 880 | * |
---|
| 881 | * RETURN VALUES |
---|
| 882 | * returns 0 on success |
---|
| 883 | */ |
---|
| 884 | |
---|
| 885 | int |
---|
| 886 | _ds_encode_block (ds_message_part_t block, int encoding) |
---|
| 887 | { |
---|
| 888 | /* we can't encode a block with the same encoding */ |
---|
| 889 | |
---|
| 890 | if (block->encoding == encoding) |
---|
| 891 | return EINVAL; |
---|
| 892 | |
---|
| 893 | /* we can't encode a block that's already encoded */ |
---|
| 894 | |
---|
| 895 | if (block->encoding == EN_BASE64 || block->encoding == EN_QUOTED_PRINTABLE) |
---|
| 896 | return EFAILURE; |
---|
| 897 | |
---|
| 898 | if (encoding == EN_BASE64) { |
---|
| 899 | char *encoded = _ds_encode_base64 (block->body->data); |
---|
| 900 | buffer_destroy (block->body); |
---|
| 901 | block->body = buffer_create (encoded); |
---|
| 902 | free (encoded); |
---|
| 903 | block->encoding = EN_BASE64; |
---|
| 904 | } |
---|
| 905 | else if (encoding == EN_QUOTED_PRINTABLE) { |
---|
| 906 | |
---|
| 907 | /* TODO */ |
---|
| 908 | |
---|
| 909 | return 0; |
---|
| 910 | } |
---|
| 911 | |
---|
| 912 | LOGDEBUG("unsupported encoding: %d", encoding); |
---|
| 913 | return 0; |
---|
| 914 | } |
---|
| 915 | |
---|
| 916 | /* |
---|
| 917 | * _ds_encode_{base64,quoted} |
---|
| 918 | * |
---|
| 919 | * DESCRIPTION |
---|
| 920 | * supporting block encoder functions |
---|
| 921 | * these function call (or perform) specific encoding functions |
---|
| 922 | * |
---|
| 923 | * INPUT ARGUMENTS |
---|
| 924 | * body decoded message body |
---|
| 925 | * |
---|
| 926 | * RETURN VALUES |
---|
| 927 | * a pointer to the allocated character array containing the encoded body |
---|
| 928 | */ |
---|
| 929 | |
---|
| 930 | char * |
---|
| 931 | _ds_encode_base64 (const char *body) |
---|
| 932 | { |
---|
| 933 | return base64encode (body); |
---|
| 934 | } |
---|
| 935 | |
---|
| 936 | /* |
---|
| 937 | * _ds_assemble_message (ds_message_t message) |
---|
| 938 | * |
---|
| 939 | * DESCRIPTION |
---|
| 940 | * assembles a message structure into a flat text message |
---|
| 941 | * |
---|
| 942 | * INPUT ARGUMENTS |
---|
| 943 | * message the message structure (ds_message_t) to assemble |
---|
| 944 | * |
---|
| 945 | * RETURN VALUES |
---|
| 946 | * a pointer to the allocated character array containing the text message |
---|
| 947 | */ |
---|
| 948 | |
---|
| 949 | char * |
---|
| 950 | _ds_assemble_message (ds_message_t message, const char *newline) |
---|
| 951 | { |
---|
| 952 | buffer *out = buffer_create (NULL); |
---|
| 953 | struct nt_node *node_nt, *node_header; |
---|
| 954 | struct nt_c c_nt, c_nt2; |
---|
| 955 | char *heading; |
---|
| 956 | char *copyback; |
---|
| 957 | #ifdef VERBOSE |
---|
| 958 | int i = 0; |
---|
| 959 | #endif |
---|
| 960 | |
---|
| 961 | if (!out) { |
---|
| 962 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 963 | return NULL; |
---|
| 964 | } |
---|
| 965 | |
---|
| 966 | node_nt = c_nt_first (message->components, &c_nt); |
---|
| 967 | while (node_nt != NULL && node_nt->ptr != NULL) |
---|
| 968 | { |
---|
| 969 | ds_message_part_t block = |
---|
| 970 | (ds_message_part_t) node_nt->ptr; |
---|
| 971 | #ifdef VERBOSE |
---|
| 972 | LOGDEBUG ("assembling component %d", i); |
---|
| 973 | #endif |
---|
| 974 | |
---|
| 975 | /* Assemble headers */ |
---|
| 976 | |
---|
| 977 | if (block->headers != NULL && block->headers->items > 0) |
---|
| 978 | { |
---|
| 979 | node_header = c_nt_first (block->headers, &c_nt2); |
---|
| 980 | while (node_header != NULL) |
---|
| 981 | { |
---|
| 982 | char *data; |
---|
| 983 | ds_header_t current_header = |
---|
| 984 | (ds_header_t) node_header->ptr; |
---|
| 985 | |
---|
| 986 | data = (current_header->original_data == NULL) ? current_header->data : |
---|
| 987 | current_header->original_data; |
---|
| 988 | |
---|
| 989 | heading = malloc( |
---|
| 990 | ((current_header->heading) ? strlen(current_header->heading) : 0) |
---|
| 991 | + ((data) ? strlen(data) : 0) |
---|
| 992 | + 3 + strlen(newline)); |
---|
| 993 | |
---|
| 994 | if (current_header->heading != NULL && |
---|
| 995 | (!strncmp (current_header->heading, "From ", 5) || |
---|
| 996 | !strncmp (current_header->heading, "--", 2))) |
---|
| 997 | sprintf (heading, "%s:%s%s", |
---|
| 998 | (current_header->heading) ? current_header->heading : "", |
---|
| 999 | (data) ? data : "", newline); |
---|
| 1000 | else |
---|
| 1001 | sprintf (heading, "%s: %s%s", |
---|
| 1002 | (current_header->heading) ? current_header->heading : "", |
---|
| 1003 | (data) ? data : "", newline); |
---|
| 1004 | |
---|
| 1005 | buffer_cat (out, heading); |
---|
| 1006 | free(heading); |
---|
| 1007 | node_header = c_nt_next (block->headers, &c_nt2); |
---|
| 1008 | } |
---|
| 1009 | } |
---|
| 1010 | |
---|
| 1011 | buffer_cat (out, newline); |
---|
| 1012 | |
---|
| 1013 | /* Assemble bodies */ |
---|
| 1014 | |
---|
| 1015 | if (block->original_signed_body != NULL && message->protect) |
---|
| 1016 | buffer_cat (out, block->original_signed_body->data); |
---|
| 1017 | else |
---|
| 1018 | buffer_cat (out, block->body->data); |
---|
| 1019 | |
---|
| 1020 | if (block->terminating_boundary != NULL) |
---|
| 1021 | { |
---|
| 1022 | buffer_cat (out, "--"); |
---|
| 1023 | buffer_cat (out, block->terminating_boundary); |
---|
| 1024 | } |
---|
| 1025 | |
---|
| 1026 | node_nt = c_nt_next (message->components, &c_nt); |
---|
| 1027 | #ifdef VERBOSE |
---|
| 1028 | i++; |
---|
| 1029 | #endif |
---|
| 1030 | |
---|
| 1031 | if (node_nt != NULL && node_nt->ptr != NULL) |
---|
| 1032 | buffer_cat (out, newline); |
---|
| 1033 | } |
---|
| 1034 | |
---|
| 1035 | copyback = out->data; |
---|
| 1036 | out->data = NULL; |
---|
| 1037 | buffer_destroy (out); |
---|
| 1038 | return copyback; |
---|
| 1039 | } |
---|
| 1040 | |
---|
| 1041 | /* |
---|
| 1042 | * _ds_{push,pop,match,extract}_boundary |
---|
| 1043 | * |
---|
| 1044 | * DESCRIPTION |
---|
| 1045 | * these functions maintain and service a boundary "stack" on the message |
---|
| 1046 | */ |
---|
| 1047 | |
---|
| 1048 | int |
---|
| 1049 | _ds_push_boundary (struct nt *stack, const char *boundary) |
---|
| 1050 | { |
---|
| 1051 | char *y; |
---|
| 1052 | |
---|
| 1053 | if (boundary == NULL || boundary[0] == 0) |
---|
| 1054 | return EINVAL; |
---|
| 1055 | |
---|
| 1056 | y = malloc (strlen (boundary) + 3); |
---|
| 1057 | if (y == NULL) |
---|
| 1058 | return EUNKNOWN; |
---|
| 1059 | |
---|
| 1060 | sprintf (y, "--%s", boundary); |
---|
| 1061 | nt_add (stack, (char *) y); |
---|
| 1062 | free(y); |
---|
| 1063 | |
---|
| 1064 | return 0; |
---|
| 1065 | } |
---|
| 1066 | |
---|
| 1067 | char * |
---|
| 1068 | _ds_pop_boundary (struct nt *stack) |
---|
| 1069 | { |
---|
| 1070 | struct nt_node *node, *last_node = NULL, *parent_node = NULL; |
---|
| 1071 | struct nt_c c; |
---|
| 1072 | char *boundary = NULL; |
---|
| 1073 | |
---|
| 1074 | node = c_nt_first (stack, &c); |
---|
| 1075 | while (node != NULL) |
---|
| 1076 | { |
---|
| 1077 | parent_node = last_node; |
---|
| 1078 | last_node = node; |
---|
| 1079 | node = c_nt_next (stack, &c); |
---|
| 1080 | } |
---|
| 1081 | if (parent_node != NULL) |
---|
| 1082 | parent_node->next = NULL; |
---|
| 1083 | else |
---|
| 1084 | stack->first = NULL; |
---|
| 1085 | |
---|
| 1086 | if (last_node == NULL) |
---|
| 1087 | return NULL; |
---|
| 1088 | |
---|
| 1089 | boundary = strdup (last_node->ptr); |
---|
| 1090 | |
---|
| 1091 | free (last_node->ptr); |
---|
| 1092 | free (last_node); |
---|
| 1093 | |
---|
| 1094 | return boundary; |
---|
| 1095 | } |
---|
| 1096 | |
---|
| 1097 | int |
---|
| 1098 | _ds_match_boundary (struct nt *stack, const char *buff) |
---|
| 1099 | { |
---|
| 1100 | struct nt_node *node; |
---|
| 1101 | struct nt_c c; |
---|
| 1102 | |
---|
| 1103 | node = c_nt_first (stack, &c); |
---|
| 1104 | while (node != NULL) |
---|
| 1105 | { |
---|
| 1106 | if (!strncmp (buff, node->ptr, strlen (node->ptr))) |
---|
| 1107 | { |
---|
| 1108 | return 1; |
---|
| 1109 | } |
---|
| 1110 | node = c_nt_next (stack, &c); |
---|
| 1111 | } |
---|
| 1112 | return 0; |
---|
| 1113 | } |
---|
| 1114 | |
---|
| 1115 | int |
---|
| 1116 | _ds_extract_boundary (char *buf, size_t size, char *mem) |
---|
| 1117 | { |
---|
| 1118 | char *data, *ptr, *ptrptr; |
---|
| 1119 | |
---|
| 1120 | if (mem == NULL) |
---|
| 1121 | return EINVAL; |
---|
| 1122 | |
---|
| 1123 | data = strdup(mem); |
---|
| 1124 | if (data == NULL) { |
---|
| 1125 | LOG(LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 1126 | return EUNKNOWN; |
---|
| 1127 | } |
---|
| 1128 | |
---|
| 1129 | for(ptr=data;ptr<(data+strlen(data));ptr++) { |
---|
| 1130 | if (!strncasecmp(ptr, "boundary", 8)) { |
---|
| 1131 | ptr = strchr(ptr, '='); |
---|
| 1132 | if (ptr == NULL) { |
---|
| 1133 | free(data); |
---|
| 1134 | return EFAILURE; |
---|
| 1135 | } |
---|
| 1136 | ptr++; |
---|
| 1137 | while(isspace((int) ptr[0])) |
---|
| 1138 | ptr++; |
---|
| 1139 | if (ptr[0] == '"') |
---|
| 1140 | ptr++; |
---|
| 1141 | strtok_r(ptr, " \";\n\t", &ptrptr); |
---|
| 1142 | strlcpy(buf, ptr, size); |
---|
| 1143 | free(data); |
---|
| 1144 | return 0; |
---|
| 1145 | } |
---|
| 1146 | } |
---|
| 1147 | |
---|
| 1148 | free(data); |
---|
| 1149 | return EFAILURE; |
---|
| 1150 | } |
---|
| 1151 | |
---|
| 1152 | /* |
---|
| 1153 | * _ds_find_header (ds_message_t message, consr char *heading) { |
---|
| 1154 | * |
---|
| 1155 | * DESCRIPTION |
---|
| 1156 | * finds a header and returns its value |
---|
| 1157 | * |
---|
| 1158 | * INPUT ARGUMENTS |
---|
| 1159 | * message the message structure to search |
---|
| 1160 | * heading the heading to search for |
---|
| 1161 | * flags optional search flags |
---|
| 1162 | * |
---|
| 1163 | * RETURN VALUES |
---|
| 1164 | * a pointer to the header structure's value |
---|
| 1165 | * |
---|
| 1166 | */ |
---|
| 1167 | |
---|
| 1168 | char * |
---|
| 1169 | _ds_find_header (ds_message_t message, const char *heading) { |
---|
| 1170 | ds_message_part_t block; |
---|
| 1171 | ds_header_t head; |
---|
| 1172 | struct nt_node *node_nt; |
---|
| 1173 | |
---|
| 1174 | if (message->components->first) { |
---|
| 1175 | if ((block = message->components->first->ptr)==NULL) |
---|
| 1176 | return NULL; |
---|
| 1177 | if (block->headers == NULL) |
---|
| 1178 | return NULL; |
---|
| 1179 | } else { |
---|
| 1180 | return NULL; |
---|
| 1181 | } |
---|
| 1182 | |
---|
| 1183 | node_nt = block->headers->first; |
---|
| 1184 | while(node_nt != NULL) { |
---|
| 1185 | head = (ds_header_t) node_nt->ptr; |
---|
| 1186 | if (head && !strcasecmp(head->heading, heading)) { |
---|
| 1187 | return head->data; |
---|
| 1188 | } |
---|
| 1189 | node_nt = node_nt->next; |
---|
| 1190 | } |
---|
| 1191 | |
---|
| 1192 | return NULL; |
---|
| 1193 | } |
---|
| 1194 | |
---|
| 1195 | int _ds_hex2dec(unsigned char hex) { |
---|
| 1196 | switch (hex) { |
---|
| 1197 | case '0': return 0; |
---|
| 1198 | case '1': return 1; |
---|
| 1199 | case '2': return 2; |
---|
| 1200 | case '3': return 3; |
---|
| 1201 | case '4': return 4; |
---|
| 1202 | case '5': return 5; |
---|
| 1203 | case '6': return 6; |
---|
| 1204 | case '7': return 7; |
---|
| 1205 | case '8': return 8; |
---|
| 1206 | case '9': return 9; |
---|
| 1207 | case 'a': case 'A': return 10; |
---|
| 1208 | case 'b': case 'B': return 11; |
---|
| 1209 | case 'c': case 'C': return 12; |
---|
| 1210 | case 'd': case 'D': return 13; |
---|
| 1211 | case 'e': case 'E': return 14; |
---|
| 1212 | case 'f': case 'F': return 15; |
---|
| 1213 | default: return -1; |
---|
| 1214 | } |
---|
| 1215 | } |
---|
| 1216 | |
---|
| 1217 | /* |
---|
| 1218 | * _ds_strip_html(const char *html) |
---|
| 1219 | * |
---|
| 1220 | * DESCRIPTION |
---|
| 1221 | * strip html tags from the supplied message |
---|
| 1222 | * |
---|
| 1223 | * INPUT ARGUMENTS |
---|
| 1224 | * html encoded message body |
---|
| 1225 | * |
---|
| 1226 | * RETURN VALUES |
---|
| 1227 | * a pointer to the allocated character array containing the |
---|
| 1228 | * stripped message |
---|
| 1229 | * |
---|
| 1230 | */ |
---|
| 1231 | |
---|
| 1232 | char * |
---|
| 1233 | _ds_strip_html (const char *html) |
---|
| 1234 | { |
---|
| 1235 | #ifdef VERBOSE |
---|
| 1236 | LOGDEBUG("stripping HTML tags from message block"); |
---|
| 1237 | #endif |
---|
| 1238 | size_t j = 0, k = 0, i = 0; |
---|
| 1239 | int visible = 1; |
---|
| 1240 | int closing_td_tag = 0; |
---|
| 1241 | char *html2; |
---|
| 1242 | const char *cdata_close_tag = NULL; |
---|
| 1243 | |
---|
| 1244 | if(!html) |
---|
| 1245 | return NULL; |
---|
| 1246 | |
---|
| 1247 | static struct { |
---|
| 1248 | unsigned int id; |
---|
| 1249 | char *entity; |
---|
| 1250 | } |
---|
| 1251 | charset[] = { |
---|
| 1252 | { 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" }, |
---|
| 1253 | { 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" }, |
---|
| 1254 | { 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" }, |
---|
| 1255 | { 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" }, |
---|
| 1256 | { 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" }, |
---|
| 1257 | { 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" }, |
---|
| 1258 | { 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" }, |
---|
| 1259 | { 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" }, |
---|
| 1260 | { 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" }, |
---|
| 1261 | { 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" }, |
---|
| 1262 | { 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" }, |
---|
| 1263 | { 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" }, |
---|
| 1264 | { 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" }, |
---|
| 1265 | { 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" }, |
---|
| 1266 | { 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" }, |
---|
| 1267 | { 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" }, |
---|
| 1268 | { 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" }, |
---|
| 1269 | { 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" }, |
---|
| 1270 | { 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" }, |
---|
| 1271 | { 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" }, |
---|
| 1272 | { 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" }, |
---|
| 1273 | { 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" }, |
---|
| 1274 | { 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" }, |
---|
| 1275 | { 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" }, |
---|
| 1276 | { 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" }, |
---|
| 1277 | { 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" }, |
---|
| 1278 | { 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" }, |
---|
| 1279 | { 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" }, |
---|
| 1280 | { 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" }, |
---|
| 1281 | { 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" }, |
---|
| 1282 | { 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" }, |
---|
| 1283 | { 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" }, |
---|
| 1284 | { 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" }, |
---|
| 1285 | { 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" }, |
---|
| 1286 | { 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" }, |
---|
| 1287 | { 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" }, |
---|
| 1288 | { 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" }, |
---|
| 1289 | { 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" }, |
---|
| 1290 | { 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" }, |
---|
| 1291 | { 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" }, |
---|
| 1292 | { 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" }, |
---|
| 1293 | { 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " }, |
---|
| 1294 | { 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" }, |
---|
| 1295 | { 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" }, |
---|
| 1296 | { 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" }, |
---|
| 1297 | { 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" }, |
---|
| 1298 | { 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" }, |
---|
| 1299 | { 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" }, |
---|
| 1300 | { 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" }, |
---|
| 1301 | { 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" }, |
---|
| 1302 | { 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" }, |
---|
| 1303 | { 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" }, |
---|
| 1304 | { 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" }, |
---|
| 1305 | { 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" }, |
---|
| 1306 | { 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" }, |
---|
| 1307 | { 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" }, |
---|
| 1308 | { 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" }, |
---|
| 1309 | { 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" }, |
---|
| 1310 | { 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" }, |
---|
| 1311 | { 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" }, |
---|
| 1312 | { 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" }, |
---|
| 1313 | { 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" }, |
---|
| 1314 | { 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" }, |
---|
| 1315 | { 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" }, |
---|
| 1316 | { 9829, "♥" }, {9830, "♦" } |
---|
| 1317 | }; |
---|
| 1318 | int num_chars = sizeof(charset) / sizeof(charset[0]); |
---|
| 1319 | |
---|
| 1320 | static struct { |
---|
| 1321 | char *open_tag; |
---|
| 1322 | char *uri_tag; |
---|
| 1323 | } |
---|
| 1324 | uritag[] = { |
---|
| 1325 | { "<a", "href" }, { "<img", "src" }, { "<input", "src" }, |
---|
| 1326 | { "<iframe", "src" }, { "<frame", "src" }, { "<script", "src" }, |
---|
| 1327 | { "<form", "action" }, { "<embed", "src" }, { "<area", "href" }, |
---|
| 1328 | { "<base", "href" }, { "<link", "href" }, { "<source", "src" }, |
---|
| 1329 | { "<body", "background" }, { "<blockquote", "cite" }, { "<q", "cite" }, |
---|
| 1330 | { "<ins", "cite" }, { "<del", "cite" } |
---|
| 1331 | }; |
---|
| 1332 | int num_uri = sizeof(uritag) / sizeof(uritag[0]); |
---|
| 1333 | |
---|
| 1334 | size_t len = strlen(html); |
---|
| 1335 | html2 = malloc(len+1); |
---|
| 1336 | |
---|
| 1337 | if (html2 == NULL) { |
---|
| 1338 | LOG (LOG_CRIT, ERR_MEM_ALLOC); |
---|
| 1339 | return NULL; |
---|
| 1340 | } |
---|
| 1341 | |
---|
| 1342 | for (i = 0; i < len; i++) { |
---|
| 1343 | if (html[i] == '<') { |
---|
| 1344 | if (cdata_close_tag) { |
---|
| 1345 | if (strncasecmp(html + i, cdata_close_tag, strlen(cdata_close_tag)) == 0) { |
---|
| 1346 | i += strlen(cdata_close_tag) - 1; |
---|
| 1347 | cdata_close_tag = NULL; |
---|
| 1348 | } |
---|
| 1349 | continue; |
---|
| 1350 | } else if (strncasecmp(html + i, "</td>", 5) == 0) { |
---|
| 1351 | i += 4; |
---|
| 1352 | closing_td_tag = 1; |
---|
| 1353 | continue; |
---|
| 1354 | } else if (strncasecmp(html + i, "<td", 3) == 0 && closing_td_tag) { |
---|
| 1355 | if (j > 0 && !isspace(html2[j-1])) { |
---|
| 1356 | html2[j++]=' '; |
---|
| 1357 | } |
---|
| 1358 | visible = 0; |
---|
| 1359 | } else { |
---|
| 1360 | closing_td_tag = 0; |
---|
| 1361 | visible = 1; |
---|
| 1362 | } |
---|
| 1363 | k = i + 1; |
---|
| 1364 | |
---|
| 1365 | if ((k < len) && (!( (html[k] >= 65 && html[k] <= 90) || |
---|
| 1366 | (html[k] >= 97 && html[k] <= 122) || |
---|
| 1367 | (html[k] == 47) || |
---|
| 1368 | (html[k] == 33) ))) { |
---|
| 1369 | /* Not a HTML tag. HTML tags start with a letter, forwardslash or exclamation mark */ |
---|
| 1370 | visible = 1; |
---|
| 1371 | html2[j++]=html[i]; |
---|
| 1372 | i = k; |
---|
| 1373 | const char *w = &(html[k]); |
---|
| 1374 | while (j < len && (size_t)(w - html) < len && *w != '<') { |
---|
| 1375 | html2[j++]=*w; |
---|
| 1376 | w++; |
---|
| 1377 | i++; |
---|
| 1378 | } |
---|
| 1379 | continue; |
---|
| 1380 | } else if (html[k]) { |
---|
| 1381 | /* find the end of the tag */ |
---|
| 1382 | while (k < len && html[k] != '<' && html[k] != '>') {k++;} |
---|
| 1383 | |
---|
| 1384 | /* if we've got a tag with a uri, save the address to print later. */ |
---|
| 1385 | char *url_tag = " "; |
---|
| 1386 | int tag_offset = 0, x = 0, y = 0; |
---|
| 1387 | for (y = 0; y < num_uri; y++) { |
---|
| 1388 | x = strlen(uritag[y].open_tag); |
---|
| 1389 | if (strncasecmp(html+i,uritag[y].open_tag,x)==0 && (i+x < len && isspace(html[i+x]))) { |
---|
| 1390 | url_tag = uritag[y].uri_tag; |
---|
| 1391 | tag_offset = i + x + 1; |
---|
| 1392 | break; |
---|
| 1393 | } |
---|
| 1394 | } |
---|
| 1395 | /* tag with uri found */ |
---|
| 1396 | if (tag_offset > 0) { |
---|
| 1397 | size_t url_start; /* start of url tag inclusive [ */ |
---|
| 1398 | size_t url_tag_len = strlen(url_tag); |
---|
| 1399 | char delim = ' '; |
---|
| 1400 | /* find start of uri */ |
---|
| 1401 | for (url_start = tag_offset; url_start <= k; url_start++) { |
---|
| 1402 | if (strncasecmp(html + url_start, url_tag, url_tag_len) == 0) { |
---|
| 1403 | url_start += url_tag_len; |
---|
| 1404 | while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces before = */ |
---|
| 1405 | if (html[url_start] == '=') { |
---|
| 1406 | url_start++; |
---|
| 1407 | while (html[url_start] && isspace(html[url_start])) {url_start++;} /* remove spaces after = */ |
---|
| 1408 | if (html[url_start] == '"') { |
---|
| 1409 | delim = '"'; |
---|
| 1410 | url_start++; |
---|
| 1411 | } else if (html[url_start] == '\'') { |
---|
| 1412 | delim = '\''; |
---|
| 1413 | url_start++; |
---|
| 1414 | } else { |
---|
| 1415 | delim = '>'; |
---|
| 1416 | } |
---|
| 1417 | break; |
---|
| 1418 | } else { |
---|
| 1419 | /* Start of uri tag found but no '=' after the tag. |
---|
| 1420 | * Skip the whole tag. |
---|
| 1421 | */ |
---|
| 1422 | break; |
---|
| 1423 | } |
---|
| 1424 | } else if ((url_start - tag_offset) >= 50) { |
---|
| 1425 | /* The length of the html tag is over 50 characters long without |
---|
| 1426 | * finding the start of the url/uri. Skip the whole tag. |
---|
| 1427 | */ |
---|
| 1428 | break; |
---|
| 1429 | } |
---|
| 1430 | } |
---|
| 1431 | /* find end of uri */ |
---|
| 1432 | if (delim != ' ') { |
---|
| 1433 | if (url_start < len && |
---|
| 1434 | (strncasecmp(html + url_start, "http:", 5) == 0 || |
---|
| 1435 | strncasecmp(html + url_start, "https:", 6) == 0 || |
---|
| 1436 | strncasecmp(html + url_start, "ftp:", 4) == 0)) { |
---|
| 1437 | html2[j++]=' '; |
---|
| 1438 | const char *w = &(html[url_start]); |
---|
| 1439 | /* html2 is a buffer of len + 1, where the +1 is for NULL |
---|
| 1440 | * termination. This means we only want to loop to len |
---|
| 1441 | * since we will replace html2[j] right after the loop. |
---|
| 1442 | */ |
---|
| 1443 | while (j < len && (size_t)(w - html) < len && *w != delim) { |
---|
| 1444 | html2[j++]=*w; |
---|
| 1445 | w++; |
---|
| 1446 | } |
---|
| 1447 | html2[j++]=' '; |
---|
| 1448 | } |
---|
| 1449 | } |
---|
| 1450 | } else if (strncasecmp(html + i, "<p>", 3) == 0 |
---|
| 1451 | || strncasecmp(html + i, "<p ", 3) == 0 |
---|
| 1452 | || strncasecmp(html + i, "<p\t", 3) == 0 |
---|
| 1453 | || strncasecmp(html + i, "<tr", 3) == 0 |
---|
| 1454 | || strncasecmp(html + i, "<option", 7) == 0 |
---|
| 1455 | || strncasecmp(html + i, "<br", 3) == 0 |
---|
| 1456 | || strncasecmp(html + i, "<li", 3) == 0 |
---|
| 1457 | || strncasecmp(html + i, "<div", 4) == 0 |
---|
| 1458 | || strncasecmp(html + i, "</select>", 9) == 0 |
---|
| 1459 | || strncasecmp(html + i, "</table>", 8) == 0) { |
---|
| 1460 | if (j > 0 && html2[j-1] != '\n' && html2[j-1] != '\r') { |
---|
| 1461 | html2[j++] = '\n'; |
---|
| 1462 | } |
---|
| 1463 | } else if (strncasecmp(html + i, "<applet", 7) == 0) { |
---|
| 1464 | cdata_close_tag = "</applet>"; |
---|
| 1465 | } else if (strncasecmp(html + i, "<embed", 6) == 0) { |
---|
| 1466 | cdata_close_tag = "</embed>"; |
---|
| 1467 | } else if (strncasecmp(html + i, "<frameset", 9) == 0) { |
---|
| 1468 | cdata_close_tag = "</frameset>"; |
---|
| 1469 | } else if (strncasecmp(html + i, "<frame", 6) == 0) { |
---|
| 1470 | cdata_close_tag = "</frame>"; |
---|
| 1471 | } else if (strncasecmp(html + i, "<iframe", 7) == 0) { |
---|
| 1472 | cdata_close_tag = "</iframe>"; |
---|
| 1473 | } else if (strncasecmp(html + i, "<noembed", 8) == 0) { |
---|
| 1474 | cdata_close_tag = "</noembed>"; |
---|
| 1475 | } else if (strncasecmp(html + i, "<noscript", 9) == 0) { |
---|
| 1476 | cdata_close_tag = "</noscript>"; |
---|
| 1477 | } else if (strncasecmp(html + i, "<object", 7) == 0) { |
---|
| 1478 | cdata_close_tag = "</object>"; |
---|
| 1479 | } else if (strncasecmp(html + i, "<script", 7) == 0) { |
---|
| 1480 | cdata_close_tag = "</script>"; |
---|
| 1481 | } else if (strncasecmp(html + i, "<style", 6) == 0) { |
---|
| 1482 | cdata_close_tag = "</style>"; |
---|
| 1483 | } |
---|
| 1484 | i = (html[k] == '<' || html[k] == '\0')? k - 1: k; |
---|
| 1485 | continue; |
---|
| 1486 | } |
---|
| 1487 | } else if (cdata_close_tag) { |
---|
| 1488 | continue; |
---|
| 1489 | } else if (!isspace(html[i])) { |
---|
| 1490 | visible = 1; |
---|
| 1491 | } |
---|
| 1492 | |
---|
| 1493 | if (strncmp(html+i,"&#",2)==0) { |
---|
| 1494 | int x = 0; |
---|
| 1495 | const char *w = &(html[i+2]); |
---|
| 1496 | while (*w == '0') {i++;w++;} |
---|
| 1497 | char n[5]; |
---|
| 1498 | if (html[i+4] && html[i+4] == ';' |
---|
| 1499 | && isdigit(html[i+2]) |
---|
| 1500 | && isdigit(html[i+3])) { |
---|
| 1501 | n[0] = html[i+2]; |
---|
| 1502 | n[1] = html[i+3]; |
---|
| 1503 | n[2] = 0; |
---|
| 1504 | x = atoi(n); |
---|
| 1505 | if (x <= 255 && x >= 32) |
---|
| 1506 | html2[j++] = x; |
---|
| 1507 | i += 4; |
---|
| 1508 | } else if (html[i+6] |
---|
| 1509 | && html[i+6] == ';' |
---|
| 1510 | && isdigit(html[i+2]) |
---|
| 1511 | && isdigit(html[i+3]) |
---|
| 1512 | && isdigit(html[i+4]) |
---|
| 1513 | && isdigit(html[i+5])) { |
---|
| 1514 | n[0] = html[i+2]; |
---|
| 1515 | n[1] = html[i+3]; |
---|
| 1516 | n[2] = html[i+4]; |
---|
| 1517 | n[3] = html[i+5]; |
---|
| 1518 | n[4] = 0; |
---|
| 1519 | x = atoi(n); |
---|
| 1520 | if (x <= 255 && x >= 32) |
---|
| 1521 | html2[j++] = x; |
---|
| 1522 | i += 6; |
---|
| 1523 | } else { |
---|
| 1524 | const char *w = &(html[i]); |
---|
| 1525 | while (*w != ';' && *w != ' ' && *w != '\t' && *w != '\0') {i++;w++;} |
---|
| 1526 | } |
---|
| 1527 | visible = 0; |
---|
| 1528 | continue; |
---|
| 1529 | } else if (html[i] == '&') { |
---|
| 1530 | int x = 0, y = 0; |
---|
| 1531 | for (y = 0; y < num_chars; y++) { |
---|
| 1532 | x = strlen(charset[y].entity); |
---|
| 1533 | if (strncasecmp(html+i,charset[y].entity,x)==0) { |
---|
| 1534 | if (charset[y].id <= 255) |
---|
| 1535 | html2[j++] = charset[y].id; |
---|
| 1536 | i += x-1; |
---|
| 1537 | visible = 0; |
---|
| 1538 | continue; |
---|
| 1539 | } |
---|
| 1540 | } |
---|
| 1541 | } |
---|
| 1542 | |
---|
| 1543 | if (j < len && visible) |
---|
| 1544 | html2[j++] = html[i]; |
---|
| 1545 | |
---|
| 1546 | if (j >= len) |
---|
| 1547 | i = j = len; |
---|
| 1548 | } |
---|
| 1549 | |
---|
| 1550 | html2[j] = '\0'; |
---|
| 1551 | return (char *)html2; |
---|
| 1552 | } |
---|