ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/uulib/uuscan.c
(Generate patch)

Comparing Convert-UUlib/uulib/uuscan.c (file contents):
Revision 1.3.2.3 by root, Sun Oct 13 13:03:09 2002 UTC vs.
Revision 1.3.2.4 by root, Thu Nov 6 13:08:24 2003 UTC

55#include <uudeview.h> 55#include <uudeview.h>
56#include <uuint.h> 56#include <uuint.h>
57#include <fptools.h> 57#include <fptools.h>
58#include <uustring.h> 58#include <uustring.h>
59 59
60char * uuscan_id = "$Id: uuscan.c,v 1.3.2.3 2002/10/13 13:03:09 root Exp $"; 60char * uuscan_id = "$Id: uuscan.c,v 1.3.2.4 2003/11/06 13:08:24 root Exp $";
61 61
62/* 62/*
63 * Header fields we recognize as such. See RFC822. We add "From ", 63 * Header fields we recognize as such. See RFC822. We add "From ",
64 * the usual marker for a beginning of a new message, and a couple 64 * the usual marker for a beginning of a new message, and a couple
65 * of usual MDA, News and MIME headers. 65 * of usual MDA, News and MIME headers.
117/* 117/*
118 * mallocable areas 118 * mallocable areas
119 */ 119 */
120 120
121char *uuscan_shlline; 121char *uuscan_shlline;
122char *uuscan_shlline2;
122char *uuscan_pvvalue; 123char *uuscan_pvvalue;
123char *uuscan_phtext; 124char *uuscan_phtext;
124char *uuscan_sdline; 125char *uuscan_sdline;
125char *uuscan_sdbhds1; 126char *uuscan_sdbhds1;
126char *uuscan_sdbhds2; 127char *uuscan_sdbhds2;
140IsLineEmpty (char *data) 141IsLineEmpty (char *data)
141{ 142{
142 if (data == NULL) return 0; 143 if (data == NULL) return 0;
143 while (*data && isspace (*data)) data++; 144 while (*data && isspace (*data)) data++;
144 return ((*data)?0:1); 145 return ((*data)?0:1);
146}
147
148/*
149 * Is this a header line? A header line has alphanumeric characters
150 * followed by a colon.
151 */
152
153static int
154IsHeaderLine (char *data)
155{
156 if (data == NULL) return 0;
157 if (*data == ':') return 0;
158 while (*data && isalnum (*data)) data++;
159 return (*data == ':') ? 1 : 0;
145} 160}
146 161
147/* 162/*
148 * Scans a potentially folded header line from the input file. If 163 * Scans a potentially folded header line from the input file. If
149 * initial is non-NULL, it is the first line of the header, useful 164 * initial is non-NULL, it is the first line of the header, useful
155 170
156static char * 171static char *
157ScanHeaderLine (FILE *datei, char *initial) 172ScanHeaderLine (FILE *datei, char *initial)
158{ 173{
159 char *ptr=uuscan_shlline; 174 char *ptr=uuscan_shlline;
175 char *ptr2, *p1, *p2, *p3;
160 int llength, c; 176 int llength, c;
161 long curpos; 177 long curpos;
162 int hadcr; 178 int hadcr;
163 179
164 if (initial) { 180 if (initial) {
249 llength += c; 265 llength += c;
250 while (llength && isspace(*(ptr-1))) { 266 while (llength && isspace(*(ptr-1))) {
251 ptr--; llength--; 267 ptr--; llength--;
252 } 268 }
253 } 269 }
270
254 *ptr = '\0'; 271 *ptr = '\0';
255 272
256 if (llength == 0) 273 if (llength == 0)
257 return NULL; 274 return NULL;
258 275
276 /*
277 * Now that we've read the header line, we can RFC 1522-decode it
278 */
279
280 ptr = uuscan_shlline;
281 ptr2 = uuscan_shlline2;
282
283 while (*ptr) {
284 /*
285 * Look for =? magic
286 */
287
288 if (*ptr == '=' && *(ptr+1) == '?') {
289 /*
290 * Let p1 point to the charset, look for next question mark
291 */
292
293 p1 = p2 = ptr+2;
294
295 while (*p2 && *p2 != '?') {
296 p2++;
297 }
298
299 if (*p2 == '?' &&
300 (*(p2+1) == 'q' || *(p2+1) == 'Q' ||
301 *(p2+1) == 'b' || *(p2+1) == 'B') &&
302 *(p2+2) == '?') {
303 /*
304 * Let p2 point to the encoding, look for ?= magic
305 */
306
307 p2++;
308 p3=p2+2;
309
310 while (*p3 && (*p3 != '?' || *(p3+1) != '=')) {
311 p3++;
312 }
313
314 if (*p3 == '?' && *(p3+1) == '=') {
315 /*
316 * Alright, we've found an RFC 1522 header field
317 */
318 if (*p2 == 'q' || *p2 == 'Q') {
319 c = UUDecodeField (p2+2, ptr2, QP_ENCODED);
320 }
321 else if (*p2 == 'b' || *p2 == 'B') {
322 c = UUDecodeField (p2+2, ptr2, B64ENCODED);
323 }
324 if (c >= 0) {
325 ptr2 += c;
326 ptr = p3+2;
327 continue;
328 }
329 }
330 }
331 }
332
333 *ptr2++ = *ptr++;
334 }
335
336 *ptr2 = 0;
337
259 return uuscan_shlline; 338 return uuscan_shlline2;
260} 339}
261 340
262/* 341/*
263 * Extract the value from a MIME attribute=value pair. This function 342 * Extract the value from a MIME attribute=value pair. This function
264 * receives a pointer to the attribute. 343 * receives a pointer to the attribute.
288 367
289 if (*attribute == '"') { 368 if (*attribute == '"') {
290 /* quoted-string */ 369 /* quoted-string */
291 attribute++; 370 attribute++;
292 while (*attribute && *attribute != '"' && length < 255) { 371 while (*attribute && *attribute != '"' && length < 255) {
293 if (*attribute == '\\')
294 *ptr++ = *++attribute;
295 else
296 *ptr++ = *attribute; 372 *ptr++ = *attribute++;
297 attribute++;
298 length++; 373 length++;
299 } 374 }
300 *ptr = '\0'; 375 *ptr = '\0';
301 } 376 }
302 else { 377 else {
303 /* tspecials from RFC1521 */ 378 /* tspecials from RFC1521 */
379 /*
380 * Note - exclude '[', ']' and ';' on popular request; these are
381 * used in some Content-Type fields by the Klez virus, and people
382 * who feed their virus scanners with the output of UUDeview would
383 * like to catch it!
384 */
304 385
305 while (*attribute && !isspace (*attribute) && 386 while (*attribute && !isspace (*attribute) &&
306 *attribute != '(' && *attribute != ')' && 387 *attribute != '(' && *attribute != ')' &&
307 *attribute != '<' && *attribute != '>' && 388 *attribute != '<' && *attribute != '>' &&
308 *attribute != '@' && *attribute != ',' && 389 *attribute != '@' && *attribute != ',' &&
309 *attribute != ';' && *attribute != ':' && 390 /* *attribute != ';' && */ *attribute != ':' &&
310 *attribute != '\\' &&*attribute != '"' && 391 *attribute != '\\' &&*attribute != '"' &&
311 *attribute != '/' && *attribute != '[' && 392 *attribute != '/' && /* *attribute != '[' &&
312 *attribute != ']' && *attribute != '?' && 393 *attribute != ']' && */ *attribute != '?' &&
313 *attribute != '=' && length < 255) 394 *attribute != '=' && length < 255)
314 *ptr++ = *attribute++; 395 *ptr++ = *attribute++;
315 396
316 *ptr = '\0'; 397 *ptr = '\0';
317 } 398 }
514 char *boundary, int ismime, int checkheaders, 595 char *boundary, int ismime, int checkheaders,
515 fileread *result) 596 fileread *result)
516{ 597{
517 char *line=uuscan_sdline, *bhds1=uuscan_sdbhds1, *bhds2=uuscan_sdbhds2; 598 char *line=uuscan_sdline, *bhds1=uuscan_sdbhds1, *bhds2=uuscan_sdbhds2;
518 static char *ptr, *p2, *p3=NULL, *bhdsp, bhl; 599 static char *ptr, *p2, *p3=NULL, *bhdsp, bhl;
519 int isb64[10], isuue[10], isxxe[10], isbhx[10], iscnt; 600 int islen[10], isb64[10], isuue[10], isxxe[10], isbhx[10], iscnt;
520 int cbb64, cbuue, cbxxe, cbbhx; 601 int cbb64, cbuue, cbxxe, cbbhx;
521 int bhflag=0, vflag, haddh=0, hadct=0; 602 int bhflag=0, vflag, haddh=0, hadct=0;
522 int bhrpc=0, bhnf=0, c, hcount, lcount, blen=0; 603 int bhrpc=0, bhnf=0, c, hcount, lcount, blen=0;
523 int encoding=0, dflag=0, ctline=42; 604 int encoding=0, dflag=0, ctline=42;
524 int dontcare=0, hadnl=0; 605 int dontcare=0, hadnl=0;
536 result->startpos = ftell (datei); 617 result->startpos = ftell (datei);
537 hcount = lcount = 0; 618 hcount = lcount = 0;
538 619
539 for (iscnt=0; iscnt<10; iscnt++) { 620 for (iscnt=0; iscnt<10; iscnt++) {
540 isb64[iscnt] = isuue[iscnt] = isxxe[iscnt] = isbhx[iscnt] = 0; 621 isb64[iscnt] = isuue[iscnt] = isxxe[iscnt] = isbhx[iscnt] = 0;
622 islen[iscnt] = -1;
541 } 623 }
542 624
543 iscnt = 0; 625 iscnt = 0;
544 626
545 if (boundary) 627 if (boundary)
853 * Detection for yEnc encoding 935 * Detection for yEnc encoding
854 */ 936 */
855 937
856 if (strncmp (line, "=ybegin ", 8) == 0 && 938 if (strncmp (line, "=ybegin ", 8) == 0 &&
857 _FP_strstr (line, " name=") != NULL) { 939 _FP_strstr (line, " name=") != NULL) {
858 if ((result->begin || result->end) && !uu_more_mime) { 940 if ((result->begin || result->end || result->uudet) && !uu_more_mime) {
859 fseek (datei, oldposition, SEEK_SET); 941 fseek (datei, oldposition, SEEK_SET);
860 break; 942 break;
861 } 943 }
862 944
863 /* 945 /*
926 if (strncmp (line, "=yend ", 6) == 0 && 1008 if (strncmp (line, "=yend ", 6) == 0 &&
927 result->uudet == YENC_ENCODED) { 1009 result->uudet == YENC_ENCODED) {
928 if (yepartends == 0 || yepartends >= yefilesize) { 1010 if (yepartends == 0 || yepartends >= yefilesize) {
929 result->end = 1; 1011 result->end = 1;
930 } 1012 }
1013#if 0
931 if (!uu_more_mime) 1014 if (!uu_more_mime)
932 break; 1015 break;
1016#endif
1017 continue;
933 } 1018 }
934 1019
935 /* 1020 /*
936 * if we haven't yet found anything encoded, try to find something 1021 * if we haven't yet found anything encoded, try to find something
937 */ 1022 */
946 1031
947 /* 1032 /*
948 * Check data against all possible encodings 1033 * Check data against all possible encodings
949 */ 1034 */
950 1035
1036 islen[iscnt%10] = strlen(line);
951 isb64[iscnt%10] = (UUValidData (line, B64ENCODED, &bhflag)==B64ENCODED); 1037 isb64[iscnt%10] = (UUValidData (line, B64ENCODED, &bhflag)==B64ENCODED);
952 isuue[iscnt%10] = (UUValidData (line, UU_ENCODED, &bhflag)==UU_ENCODED); 1038 isuue[iscnt%10] = (UUValidData (line, UU_ENCODED, &bhflag)==UU_ENCODED);
953 isxxe[iscnt%10] = (UUValidData (line, XX_ENCODED, &bhflag)==XX_ENCODED); 1039 isxxe[iscnt%10] = (UUValidData (line, XX_ENCODED, &bhflag)==XX_ENCODED);
954 isbhx[iscnt%10] = (UUValidData (line, BH_ENCODED, &bhflag)==BH_ENCODED); 1040 isbhx[iscnt%10] = (UUValidData (line, BH_ENCODED, &bhflag)==BH_ENCODED);
955 1041
1030 * We accept an encoding if it has been true for four consecutive 1116 * We accept an encoding if it has been true for four consecutive
1031 * lines. Check the is<enc> arrays to avoid mistaking one encoding 1117 * lines. Check the is<enc> arrays to avoid mistaking one encoding
1032 * for the other. Uuencoded data is rather easily mistaken for 1118 * for the other. Uuencoded data is rather easily mistaken for
1033 * Base 64. If the data matches more than one encoding, we need to 1119 * Base 64. If the data matches more than one encoding, we need to
1034 * scan further. 1120 * scan further.
1121 *
1122 * Since text can also rather easily be mistaken for UUencoded
1123 * data if it just happens to have 4 lines in a row that have the
1124 * correct first character for the length of the line, we also add
1125 * a check that the first 3 lines must be the same length, and the
1126 * 4th line must be less than or equal to that length. (since
1127 * uuencoders use the same length for all lines except the last,
1128 * this shouldn't increase the minimum size of UUdata we can
1129 * detect, as it would if we tested all 4 lines for being the same
1130 * length.) - Matthew Mueller, 20030109
1035 */ 1131 */
1036 1132
1037 if (iscnt > 3) { 1133 if (iscnt > 3) {
1038 cbb64 = (isb64[(iscnt-1)%10] && isb64[(iscnt-2)%10] && 1134 cbb64 = (isb64[(iscnt-1)%10] && isb64[(iscnt-2)%10] &&
1039 isb64[(iscnt-3)%10] && isb64[(iscnt-4)%10]); 1135 isb64[(iscnt-3)%10] && isb64[(iscnt-4)%10]);
1040 cbuue = (isuue[(iscnt-1)%10] && isuue[(iscnt-2)%10] && 1136 cbuue = (isuue[(iscnt-1)%10] && isuue[(iscnt-2)%10] &&
1041 isuue[(iscnt-3)%10] && isuue[(iscnt-4)%10]); 1137 isuue[(iscnt-3)%10] && isuue[(iscnt-4)%10] &&
1138 islen[(iscnt-1)%10] <= islen[(iscnt-2)%10] &&
1139 islen[(iscnt-2)%10] == islen[(iscnt-3)%10] &&
1140 islen[(iscnt-3)%10] == islen[(iscnt-4)%10]);
1042 cbxxe = (isxxe[(iscnt-1)%10] && isxxe[(iscnt-2)%10] && 1141 cbxxe = (isxxe[(iscnt-1)%10] && isxxe[(iscnt-2)%10] &&
1043 isxxe[(iscnt-3)%10] && isxxe[(iscnt-4)%10]); 1142 isxxe[(iscnt-3)%10] && isxxe[(iscnt-4)%10] &&
1143 islen[(iscnt-1)%10] <= islen[(iscnt-2)%10] &&
1144 islen[(iscnt-2)%10] == islen[(iscnt-3)%10] &&
1145 islen[(iscnt-3)%10] == islen[(iscnt-4)%10]);
1044 cbbhx = (isbhx[(iscnt-1)%10] && isbhx[(iscnt-2)%10] && 1146 cbbhx = (isbhx[(iscnt-1)%10] && isbhx[(iscnt-2)%10] &&
1045 isbhx[(iscnt-3)%10] && isbhx[(iscnt-4)%10]); 1147 isbhx[(iscnt-3)%10] && isbhx[(iscnt-4)%10]);
1046 } 1148 }
1047 else { 1149 else {
1048 cbb64 = cbuue = cbxxe = cbbhx = 0; 1150 cbb64 = cbuue = cbxxe = cbbhx = 0;
1400 preheaders = ftell (datei); 1502 preheaders = ftell (datei);
1401 while (!feof (datei)) { 1503 while (!feof (datei)) {
1402 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1504 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1403 if (_FP_fgets (line, 255, datei) == NULL) 1505 if (_FP_fgets (line, 255, datei) == NULL)
1404 break; 1506 break;
1507 line[255] = '\0';
1405 if (!IsLineEmpty (line)) { 1508 if (!IsLineEmpty (line)) {
1406 fseek (datei, preheaders, SEEK_SET); 1509 fseek (datei, preheaders, SEEK_SET);
1407 line[255] = '\0';
1408 break; 1510 break;
1409 } 1511 }
1410 preheaders = ftell (datei); 1512 preheaders = ftell (datei);
1411 } 1513 }
1412 } 1514 }
1435 mssdepth--; 1537 mssdepth--;
1436 UUkillheaders (&(multistack[mssdepth].envelope)); 1538 UUkillheaders (&(multistack[mssdepth].envelope));
1437 _FP_free (multistack[mssdepth].source); 1539 _FP_free (multistack[mssdepth].source);
1438 } 1540 }
1439 1541
1542 prevpos = ftell (datei);
1440 if (_FP_fgets (line, 255, datei) == NULL) { 1543 if (_FP_fgets (line, 255, datei) == NULL) {
1441 _FP_free (result); 1544 _FP_free (result);
1442 return NULL; 1545 return NULL;
1443 } 1546 }
1444 line[255] = '\0'; 1547 line[255] = '\0';
1474 1577
1475 while (!feof (datei) && !IsLineEmpty (line)) { 1578 while (!feof (datei) && !IsLineEmpty (line)) {
1476 if (IsKnownHeader (line)) 1579 if (IsKnownHeader (line))
1477 hcount++; 1580 hcount++;
1478 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1581 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1582 if (IsHeaderLine (line)) {
1479 ptr1 = ScanHeaderLine (datei, line); 1583 ptr1 = ScanHeaderLine (datei, line);
1480 if (ParseHeader (&sstate.envelope, ptr1) == NULL) { 1584 if (ParseHeader (&sstate.envelope, ptr1) == NULL) {
1481 *errcode = UURET_NOMEM; 1585 *errcode = UURET_NOMEM;
1482 _FP_free (result); 1586 _FP_free (result);
1483 return NULL; 1587 return NULL;
1588 }
1484 } 1589 }
1485 /* 1590 /*
1486 * if we've read too many lines without finding headers, then 1591 * if we've read too many lines without finding headers, then
1487 * this probably isn't a mail folder after all 1592 * this probably isn't a mail folder after all
1488 */ 1593 */
1489 lcount++; 1594 lcount++;
1490 if (lcount > WAITHEADER && hcount < hlcount.afternl) 1595 if (lcount > WAITHEADER && hcount < hlcount.afternl) {
1596 fseek (datei, prevpos, SEEK_SET);
1597 line[0] = '\0';
1491 break; 1598 break;
1599 }
1492 1600
1493 if (_FP_fgets (line, 255, datei) == NULL) 1601 if (_FP_fgets (line, 255, datei) == NULL)
1494 break; 1602 break;
1495 line[255] = '\0'; 1603 line[255] = '\0';
1496 } 1604 }
1605
1497 /* skip empty lines */ 1606 /* skip empty lines */
1498 prevpos = ftell (datei); 1607 prevpos = ftell (datei);
1608 if (IsLineEmpty (line)) {
1499 while (!feof (datei)) { 1609 while (!feof (datei)) {
1500 if (_FP_fgets (line, 255, datei) == NULL) 1610 if (_FP_fgets (line, 255, datei) == NULL)
1501 break; 1611 break;
1502 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1612 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1503 if (!IsLineEmpty (line)) { 1613 if (!IsLineEmpty (line)) {
1504 fseek (datei, prevpos, SEEK_SET); 1614 fseek (datei, prevpos, SEEK_SET);
1505 line[255] = '\0'; 1615 line[255] = '\0';
1506 break; 1616 break;
1507 } 1617 }
1508 prevpos = ftell (datei); 1618 prevpos = ftell (datei);
1509 } 1619 }
1620 }
1621
1510 /* 1622 /*
1511 * If we don't have all valid MIME headers yet, but the following 1623 * If we don't have all valid MIME headers yet, but the following
1512 * line is a MIME header, accept it anyway. 1624 * line is a MIME header, accept it anyway.
1513 */ 1625 */
1514 1626
1572 sstate.envelope.fname && sstate.envelope.ctenc) { 1684 sstate.envelope.fname && sstate.envelope.ctenc) {
1573 sstate.envelope.mimevers = _FP_strdup ("1.0"); 1685 sstate.envelope.mimevers = _FP_strdup ("1.0");
1574 hcount = hlcount.afternl; 1686 hcount = hlcount.afternl;
1575 } 1687 }
1576 1688
1577 if (hcount < hlcount.afternl) {
1578 /* not a folder after all */
1579 fseek (datei, preheaders, SEEK_SET);
1580 sstate.isfolder = 0;
1581 sstate.ismime = 0;
1582 }
1583 else if (sstate.envelope.mimevers != NULL) { 1689 if (sstate.envelope.mimevers != NULL) {
1584 /* this is a MIME file. check the Content-Type */ 1690 /* this is a MIME file. check the Content-Type */
1585 sstate.ismime = 1; 1691 sstate.ismime = 1;
1586 if (_FP_stristr (sstate.envelope.ctype, "multipart") != NULL) { 1692 if (_FP_stristr (sstate.envelope.ctype, "multipart") != NULL) {
1587 if (sstate.envelope.boundary == NULL) { 1693 if (sstate.envelope.boundary == NULL) {
1588 UUMessage (uuscan_id, __LINE__, UUMSG_WARNING, 1694 UUMessage (uuscan_id, __LINE__, UUMSG_WARNING,
1596 } 1702 }
1597 } 1703 }
1598 else { 1704 else {
1599 sstate.mimestate = MS_BODY; /* just a `simple' message */ 1705 sstate.mimestate = MS_BODY; /* just a `simple' message */
1600 } 1706 }
1707 }
1708 else {
1709 /* not a folder after all */
1710 fseek (datei, prevpos, SEEK_SET);
1711 sstate.isfolder = 0;
1712 sstate.ismime = 0;
1601 } 1713 }
1602 } 1714 }
1603 1715
1604 if (feof (datei) || ferror (datei)) { /* oops */ 1716 if (feof (datei) || ferror (datei)) { /* oops */
1605 _FP_free (result); 1717 _FP_free (result);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines