ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/uulib/uuscan.c
(Generate patch)

Comparing Convert-UUlib/uulib/uuscan.c (file contents):
Revision 1.6 by root, Sun Oct 13 13:08:44 2002 UTC vs.
Revision 1.7 by root, Thu Nov 6 13:33:41 2003 UTC

55#include <uudeview.h> 55#include <uudeview.h>
56#include <uuint.h> 56#include <uuint.h>
57#include <fptools.h> 57#include <fptools.h>
58#include <uustring.h> 58#include <uustring.h>
59 59
60char * uuscan_id = "$Id: uuscan.c,v 1.6 2002/10/13 13:08:44 root Exp $"; 60char * uuscan_id = "$Id: uuscan.c,v 1.7 2003/11/06 13:33:41 root Exp $";
61 61
62/* 62/*
63 * Header fields we recognize as such. See RFC822. We add "From ", 63 * Header fields we recognize as such. See RFC822. We add "From ",
64 * the usual marker for a beginning of a new message, and a couple 64 * the usual marker for a beginning of a new message, and a couple
65 * of usual MDA, News and MIME headers. 65 * of usual MDA, News and MIME headers.
117/* 117/*
118 * mallocable areas 118 * mallocable areas
119 */ 119 */
120 120
121char *uuscan_shlline; 121char *uuscan_shlline;
122char *uuscan_shlline2;
122char *uuscan_pvvalue; 123char *uuscan_pvvalue;
123char *uuscan_phtext; 124char *uuscan_phtext;
124char *uuscan_sdline; 125char *uuscan_sdline;
125char *uuscan_sdbhds1; 126char *uuscan_sdbhds1;
126char *uuscan_sdbhds2; 127char *uuscan_sdbhds2;
140IsLineEmpty (char *data) 141IsLineEmpty (char *data)
141{ 142{
142 if (data == NULL) return 0; 143 if (data == NULL) return 0;
143 while (*data && isspace (*data)) data++; 144 while (*data && isspace (*data)) data++;
144 return ((*data)?0:1); 145 return ((*data)?0:1);
146}
147
148/*
149 * Is this a header line? A header line has alphanumeric characters
150 * followed by a colon.
151 */
152
153static int
154IsHeaderLine (char *data)
155{
156 if (data == NULL) return 0;
157 if (*data == ':') return 0;
158 while (*data && isalnum (*data)) data++;
159 return (*data == ':') ? 1 : 0;
145} 160}
146 161
147/* 162/*
148 * Scans a potentially folded header line from the input file. If 163 * Scans a potentially folded header line from the input file. If
149 * initial is non-NULL, it is the first line of the header, useful 164 * initial is non-NULL, it is the first line of the header, useful
155 170
156static char * 171static char *
157ScanHeaderLine (FILE *datei, char *initial) 172ScanHeaderLine (FILE *datei, char *initial)
158{ 173{
159 char *ptr=uuscan_shlline; 174 char *ptr=uuscan_shlline;
175 char *ptr2, *p1, *p2, *p3;
160 int llength, c; 176 int llength, c;
161 long curpos; 177 long curpos;
162 int hadcr; 178 int hadcr;
163 179
164 if (initial) { 180 if (initial) {
249 llength += c; 265 llength += c;
250 while (llength && isspace(*(ptr-1))) { 266 while (llength && isspace(*(ptr-1))) {
251 ptr--; llength--; 267 ptr--; llength--;
252 } 268 }
253 } 269 }
270
254 *ptr = '\0'; 271 *ptr = '\0';
255 272
256 if (llength == 0) 273 if (llength == 0)
257 return NULL; 274 return NULL;
258 275
276 /*
277 * Now that we've read the header line, we can RFC 1522-decode it
278 */
279
280 ptr = uuscan_shlline;
281 ptr2 = uuscan_shlline2;
282
283 while (*ptr) {
284 /*
285 * Look for =? magic
286 */
287
288 if (*ptr == '=' && *(ptr+1) == '?') {
289 /*
290 * Let p1 point to the charset, look for next question mark
291 */
292
293 p1 = p2 = ptr+2;
294
295 while (*p2 && *p2 != '?') {
296 p2++;
297 }
298
299 if (*p2 == '?' &&
300 (*(p2+1) == 'q' || *(p2+1) == 'Q' ||
301 *(p2+1) == 'b' || *(p2+1) == 'B') &&
302 *(p2+2) == '?') {
303 /*
304 * Let p2 point to the encoding, look for ?= magic
305 */
306
307 p2++;
308 p3=p2+2;
309
310 while (*p3 && (*p3 != '?' || *(p3+1) != '=')) {
311 p3++;
312 }
313
314 if (*p3 == '?' && *(p3+1) == '=') {
315 /*
316 * Alright, we've found an RFC 1522 header field
317 */
318 if (*p2 == 'q' || *p2 == 'Q') {
319 c = UUDecodeField (p2+2, ptr2, QP_ENCODED);
320 }
321 else if (*p2 == 'b' || *p2 == 'B') {
322 c = UUDecodeField (p2+2, ptr2, B64ENCODED);
323 }
324 if (c >= 0) {
325 ptr2 += c;
326 ptr = p3+2;
327 continue;
328 }
329 }
330 }
331 }
332
333 *ptr2++ = *ptr++;
334 }
335
336 *ptr2 = 0;
337
259 return uuscan_shlline; 338 return uuscan_shlline2;
260} 339}
261 340
262/* 341/*
263 * Extract the value from a MIME attribute=value pair. This function 342 * Extract the value from a MIME attribute=value pair. This function
264 * receives a pointer to the attribute. 343 * receives a pointer to the attribute.
306 } 385 }
307 *ptr = '\0'; 386 *ptr = '\0';
308 } 387 }
309 else { 388 else {
310 /* tspecials from RFC1521 */ 389 /* tspecials from RFC1521 */
390 /*
391 * Note - exclude '[', ']' and ';' on popular request; these are
392 * used in some Content-Type fields by the Klez virus, and people
393 * who feed their virus scanners with the output of UUDeview would
394 * like to catch it!
395 */
311 396
312 while (*attribute && !isspace (*attribute) && 397 while (*attribute && !isspace (*attribute) &&
313 *attribute != '(' && *attribute != ')' && 398 *attribute != '(' && *attribute != ')' &&
314 *attribute != '<' && *attribute != '>' && 399 *attribute != '<' && *attribute != '>' &&
315 *attribute != '@' && *attribute != ',' && 400 *attribute != '@' && *attribute != ',' &&
316 *attribute != ';' && *attribute != ':' && 401 /* *attribute != ';' && */ *attribute != ':' &&
317 *attribute != '\\' &&*attribute != '"' && 402 *attribute != '\\' &&*attribute != '"' &&
318 *attribute != '/' && *attribute != '[' && 403 *attribute != '/' && /* *attribute != '[' &&
319 *attribute != ']' && *attribute != '?' && 404 *attribute != ']' && */ *attribute != '?' &&
320 *attribute != '=' && length < 255) 405 *attribute != '=' && length < 255)
321 *ptr++ = *attribute++; 406 *ptr++ = *attribute++;
322 407
323 *ptr = '\0'; 408 *ptr = '\0';
324 } 409 }
523 char *boundary, int ismime, int checkheaders, 608 char *boundary, int ismime, int checkheaders,
524 fileread *result) 609 fileread *result)
525{ 610{
526 char *line=uuscan_sdline, *bhds1=uuscan_sdbhds1, *bhds2=uuscan_sdbhds2; 611 char *line=uuscan_sdline, *bhds1=uuscan_sdbhds1, *bhds2=uuscan_sdbhds2;
527 static char *ptr, *p2, *p3=NULL, *bhdsp, bhl; 612 static char *ptr, *p2, *p3=NULL, *bhdsp, bhl;
528 int isb64[10], isuue[10], isxxe[10], isbhx[10], iscnt; 613 int islen[10], isb64[10], isuue[10], isxxe[10], isbhx[10], iscnt;
529 int cbb64, cbuue, cbxxe, cbbhx; 614 int cbb64, cbuue, cbxxe, cbbhx;
530 int bhflag=0, vflag, haddh=0, hadct=0; 615 int bhflag=0, vflag, haddh=0, hadct=0;
531 int bhrpc=0, bhnf=0, c, hcount, lcount, blen=0; 616 int bhrpc=0, bhnf=0, c, hcount, lcount, blen=0;
532 int encoding=0, dflag=0, ctline=42; 617 int encoding=0, dflag=0, ctline=42;
533 int dontcare=0, hadnl=0; 618 int dontcare=0, hadnl=0;
547 result->startpos = ftell (datei); 632 result->startpos = ftell (datei);
548 hcount = lcount = 0; 633 hcount = lcount = 0;
549 634
550 for (iscnt=0; iscnt<10; iscnt++) { 635 for (iscnt=0; iscnt<10; iscnt++) {
551 isb64[iscnt] = isuue[iscnt] = isxxe[iscnt] = isbhx[iscnt] = 0; 636 isb64[iscnt] = isuue[iscnt] = isxxe[iscnt] = isbhx[iscnt] = 0;
637 islen[iscnt] = -1;
552 } 638 }
553 639
554 iscnt = 0; 640 iscnt = 0;
555 641
556 if (boundary) 642 if (boundary)
864 * Detection for yEnc encoding 950 * Detection for yEnc encoding
865 */ 951 */
866 952
867 if (strncmp (line, "=ybegin ", 8) == 0 && 953 if (strncmp (line, "=ybegin ", 8) == 0 &&
868 _FP_strstr (line, " name=") != NULL) { 954 _FP_strstr (line, " name=") != NULL) {
869 if ((result->begin || result->end) && !uu_more_mime) { 955 if ((result->begin || result->end || result->uudet) && !uu_more_mime) {
870 fseek (datei, oldposition, SEEK_SET); 956 fseek (datei, oldposition, SEEK_SET);
871 break; 957 break;
872 } 958 }
873 959
874 /* 960 /*
937 if (strncmp (line, "=yend ", 6) == 0 && 1023 if (strncmp (line, "=yend ", 6) == 0 &&
938 result->uudet == YENC_ENCODED) { 1024 result->uudet == YENC_ENCODED) {
939 if (yepartends == 0 || yepartends >= yefilesize) { 1025 if (yepartends == 0 || yepartends >= yefilesize) {
940 result->end = 1; 1026 result->end = 1;
941 } 1027 }
1028#if 0
942 if (!uu_more_mime) 1029 if (!uu_more_mime)
943 break; 1030 break;
1031#endif
1032 continue;
944 } 1033 }
945 1034
946 /* 1035 /*
947 * if we haven't yet found anything encoded, try to find something 1036 * if we haven't yet found anything encoded, try to find something
948 */ 1037 */
957 1046
958 /* 1047 /*
959 * Check data against all possible encodings 1048 * Check data against all possible encodings
960 */ 1049 */
961 1050
1051 islen[iscnt%10] = strlen(line);
962 isb64[iscnt%10] = (UUValidData (line, B64ENCODED, &bhflag)==B64ENCODED); 1052 isb64[iscnt%10] = (UUValidData (line, B64ENCODED, &bhflag)==B64ENCODED);
963 isuue[iscnt%10] = (UUValidData (line, UU_ENCODED, &bhflag)==UU_ENCODED); 1053 isuue[iscnt%10] = (UUValidData (line, UU_ENCODED, &bhflag)==UU_ENCODED);
964 isxxe[iscnt%10] = (UUValidData (line, XX_ENCODED, &bhflag)==XX_ENCODED); 1054 isxxe[iscnt%10] = (UUValidData (line, XX_ENCODED, &bhflag)==XX_ENCODED);
965 isbhx[iscnt%10] = (UUValidData (line, BH_ENCODED, &bhflag)==BH_ENCODED); 1055 isbhx[iscnt%10] = (UUValidData (line, BH_ENCODED, &bhflag)==BH_ENCODED);
966 1056
1041 * We accept an encoding if it has been true for four consecutive 1131 * We accept an encoding if it has been true for four consecutive
1042 * lines. Check the is<enc> arrays to avoid mistaking one encoding 1132 * lines. Check the is<enc> arrays to avoid mistaking one encoding
1043 * for the other. Uuencoded data is rather easily mistaken for 1133 * for the other. Uuencoded data is rather easily mistaken for
1044 * Base 64. If the data matches more than one encoding, we need to 1134 * Base 64. If the data matches more than one encoding, we need to
1045 * scan further. 1135 * scan further.
1136 *
1137 * Since text can also rather easily be mistaken for UUencoded
1138 * data if it just happens to have 4 lines in a row that have the
1139 * correct first character for the length of the line, we also add
1140 * a check that the first 3 lines must be the same length, and the
1141 * 4th line must be less than or equal to that length. (since
1142 * uuencoders use the same length for all lines except the last,
1143 * this shouldn't increase the minimum size of UUdata we can
1144 * detect, as it would if we tested all 4 lines for being the same
1145 * length.) - Matthew Mueller, 20030109
1046 */ 1146 */
1047 1147
1048 if (iscnt > 3) { 1148 if (iscnt > 3) {
1049 cbb64 = (isb64[(iscnt-1)%10] && isb64[(iscnt-2)%10] && 1149 cbb64 = (isb64[(iscnt-1)%10] && isb64[(iscnt-2)%10] &&
1050 isb64[(iscnt-3)%10] && isb64[(iscnt-4)%10]); 1150 isb64[(iscnt-3)%10] && isb64[(iscnt-4)%10]);
1051 cbuue = (isuue[(iscnt-1)%10] && isuue[(iscnt-2)%10] && 1151 cbuue = (isuue[(iscnt-1)%10] && isuue[(iscnt-2)%10] &&
1052 isuue[(iscnt-3)%10] && isuue[(iscnt-4)%10]); 1152 isuue[(iscnt-3)%10] && isuue[(iscnt-4)%10] &&
1153 islen[(iscnt-1)%10] <= islen[(iscnt-2)%10] &&
1154 islen[(iscnt-2)%10] == islen[(iscnt-3)%10] &&
1155 islen[(iscnt-3)%10] == islen[(iscnt-4)%10]);
1053 cbxxe = (isxxe[(iscnt-1)%10] && isxxe[(iscnt-2)%10] && 1156 cbxxe = (isxxe[(iscnt-1)%10] && isxxe[(iscnt-2)%10] &&
1054 isxxe[(iscnt-3)%10] && isxxe[(iscnt-4)%10]); 1157 isxxe[(iscnt-3)%10] && isxxe[(iscnt-4)%10] &&
1158 islen[(iscnt-1)%10] <= islen[(iscnt-2)%10] &&
1159 islen[(iscnt-2)%10] == islen[(iscnt-3)%10] &&
1160 islen[(iscnt-3)%10] == islen[(iscnt-4)%10]);
1055 cbbhx = (isbhx[(iscnt-1)%10] && isbhx[(iscnt-2)%10] && 1161 cbbhx = (isbhx[(iscnt-1)%10] && isbhx[(iscnt-2)%10] &&
1056 isbhx[(iscnt-3)%10] && isbhx[(iscnt-4)%10]); 1162 isbhx[(iscnt-3)%10] && isbhx[(iscnt-4)%10]);
1057 } 1163 }
1058 else { 1164 else {
1059 cbb64 = cbuue = cbxxe = cbbhx = 0; 1165 cbb64 = cbuue = cbxxe = cbbhx = 0;
1413 preheaders = ftell (datei); 1519 preheaders = ftell (datei);
1414 while (!feof (datei)) { 1520 while (!feof (datei)) {
1415 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1521 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1416 if (_FP_fgets (line, 255, datei) == NULL) 1522 if (_FP_fgets (line, 255, datei) == NULL)
1417 break; 1523 break;
1524 line[255] = '\0';
1418 if (!IsLineEmpty (line)) { 1525 if (!IsLineEmpty (line)) {
1419 fseek (datei, preheaders, SEEK_SET); 1526 fseek (datei, preheaders, SEEK_SET);
1420 line[255] = '\0';
1421 break; 1527 break;
1422 } 1528 }
1423 preheaders = ftell (datei); 1529 preheaders = ftell (datei);
1424 } 1530 }
1425 } 1531 }
1448 mssdepth--; 1554 mssdepth--;
1449 UUkillheaders (&(multistack[mssdepth].envelope)); 1555 UUkillheaders (&(multistack[mssdepth].envelope));
1450 _FP_free (multistack[mssdepth].source); 1556 _FP_free (multistack[mssdepth].source);
1451 } 1557 }
1452 1558
1559 prevpos = ftell (datei);
1453 if (_FP_fgets (line, 255, datei) == NULL) { 1560 if (_FP_fgets (line, 255, datei) == NULL) {
1454 _FP_free (result); 1561 _FP_free (result);
1455 return NULL; 1562 return NULL;
1456 } 1563 }
1457 line[255] = '\0'; 1564 line[255] = '\0';
1487 1594
1488 while (!feof (datei) && !IsLineEmpty (line)) { 1595 while (!feof (datei) && !IsLineEmpty (line)) {
1489 if (IsKnownHeader (line)) 1596 if (IsKnownHeader (line))
1490 hcount++; 1597 hcount++;
1491 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1598 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1599 if (IsHeaderLine (line)) {
1492 ptr1 = ScanHeaderLine (datei, line); 1600 ptr1 = ScanHeaderLine (datei, line);
1493 if (ParseHeader (&sstate.envelope, ptr1) == NULL) { 1601 if (ParseHeader (&sstate.envelope, ptr1) == NULL) {
1494 *errcode = UURET_NOMEM; 1602 *errcode = UURET_NOMEM;
1495 _FP_free (result); 1603 _FP_free (result);
1496 return NULL; 1604 return NULL;
1605 }
1497 } 1606 }
1498 /* 1607 /*
1499 * if we've read too many lines without finding headers, then 1608 * if we've read too many lines without finding headers, then
1500 * this probably isn't a mail folder after all 1609 * this probably isn't a mail folder after all
1501 */ 1610 */
1502 lcount++; 1611 lcount++;
1503 if (lcount > WAITHEADER && hcount < hlcount.afternl) 1612 if (lcount > WAITHEADER && hcount < hlcount.afternl) {
1613 fseek (datei, prevpos, SEEK_SET);
1614 line[0] = '\0';
1504 break; 1615 break;
1616 }
1505 1617
1506 if (_FP_fgets (line, 255, datei) == NULL) 1618 if (_FP_fgets (line, 255, datei) == NULL)
1507 break; 1619 break;
1508 line[255] = '\0'; 1620 line[255] = '\0';
1509 } 1621 }
1622
1510 /* skip empty lines */ 1623 /* skip empty lines */
1511 prevpos = ftell (datei); 1624 prevpos = ftell (datei);
1625 if (IsLineEmpty (line)) {
1512 while (!feof (datei)) { 1626 while (!feof (datei)) {
1513 if (_FP_fgets (line, 255, datei) == NULL) 1627 if (_FP_fgets (line, 255, datei) == NULL)
1514 break; 1628 break;
1515 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL(); 1629 if (UUBUSYPOLL(ftell(datei),progress.fsize)) SPCANCEL();
1516 if (!IsLineEmpty (line)) { 1630 if (!IsLineEmpty (line)) {
1517 fseek (datei, prevpos, SEEK_SET); 1631 fseek (datei, prevpos, SEEK_SET);
1518 line[255] = '\0'; 1632 line[255] = '\0';
1519 break; 1633 break;
1520 } 1634 }
1521 prevpos = ftell (datei); 1635 prevpos = ftell (datei);
1522 } 1636 }
1637 }
1638
1523 /* 1639 /*
1524 * If we don't have all valid MIME headers yet, but the following 1640 * If we don't have all valid MIME headers yet, but the following
1525 * line is a MIME header, accept it anyway. 1641 * line is a MIME header, accept it anyway.
1526 */ 1642 */
1527 1643
1586 sstate.envelope.fname && sstate.envelope.ctenc) { 1702 sstate.envelope.fname && sstate.envelope.ctenc) {
1587 sstate.envelope.mimevers = _FP_strdup ("1.0"); 1703 sstate.envelope.mimevers = _FP_strdup ("1.0");
1588 hcount = hlcount.afternl; 1704 hcount = hlcount.afternl;
1589 } 1705 }
1590 1706
1591 if (hcount < hlcount.afternl) {
1592 /* not a folder after all */
1593 fseek (datei, preheaders, SEEK_SET);
1594 sstate.isfolder = 0;
1595 sstate.ismime = 0;
1596 }
1597 else if (sstate.envelope.mimevers != NULL) { 1707 if (sstate.envelope.mimevers != NULL) {
1598 /* this is a MIME file. check the Content-Type */ 1708 /* this is a MIME file. check the Content-Type */
1599 sstate.ismime = 1; 1709 sstate.ismime = 1;
1600 if (_FP_stristr (sstate.envelope.ctype, "multipart") != NULL) { 1710 if (_FP_stristr (sstate.envelope.ctype, "multipart") != NULL) {
1601 if (sstate.envelope.boundary == NULL) { 1711 if (sstate.envelope.boundary == NULL) {
1602 UUMessage (uuscan_id, __LINE__, UUMSG_WARNING, 1712 UUMessage (uuscan_id, __LINE__, UUMSG_WARNING,
1610 } 1720 }
1611 } 1721 }
1612 else { 1722 else {
1613 sstate.mimestate = MS_BODY; /* just a `simple' message */ 1723 sstate.mimestate = MS_BODY; /* just a `simple' message */
1614 } 1724 }
1725 }
1726 else {
1727 /* not a folder after all */
1728 fseek (datei, prevpos, SEEK_SET);
1729 sstate.isfolder = 0;
1730 sstate.ismime = 0;
1615 } 1731 }
1616 } 1732 }
1617 1733
1618 if (feof (datei) || ferror (datei)) { /* oops */ 1734 if (feof (datei) || ferror (datei)) { /* oops */
1619 _FP_free (result); 1735 _FP_free (result);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines