1 | package Convert::UUlib; |
1 | package Convert::UUlib; |
2 | |
2 | |
3 | no warnings; |
3 | use common::sense; |
4 | use strict; |
|
|
5 | |
4 | |
6 | use Carp; |
5 | use Carp; |
7 | |
6 | |
8 | require Exporter; |
7 | require Exporter; |
9 | require DynaLoader; |
8 | require DynaLoader; |
10 | |
9 | |
11 | our $VERSION = 1.62; |
10 | our $VERSION = 1.8; |
12 | |
11 | |
13 | our @ISA = qw(Exporter DynaLoader); |
12 | our @ISA = qw(Exporter DynaLoader); |
14 | |
13 | |
15 | our @_consts = qw( |
14 | our @_consts = qw( |
16 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
15 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
… | |
… | |
34 | ); |
33 | ); |
35 | |
34 | |
36 | our @_funcs = qw( |
35 | our @_funcs = qw( |
37 | Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
36 | Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
38 | SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
37 | SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
39 | FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp |
38 | FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp |
40 | RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
39 | RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
41 | EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
40 | EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
42 | E_PrepPartial |
41 | E_PrepPartial |
43 | |
42 | |
44 | straction strencoding strmsglevel |
43 | straction strencoding strmsglevel |
… | |
… | |
48 | our @EXPORT_OK = @_funcs; |
47 | our @EXPORT_OK = @_funcs; |
49 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
48 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
50 | |
49 | |
51 | bootstrap Convert::UUlib $VERSION; |
50 | bootstrap Convert::UUlib $VERSION; |
52 | |
51 | |
53 | Initialize(); |
52 | # dummy function for compatiiblity with pre-1.7 versions |
54 | |
53 | sub Initialize { } |
55 | # not when < 5.005_6x |
|
|
56 | # END { CleanUp() } |
|
|
57 | |
|
|
58 | for (@_consts) { |
|
|
59 | my $constant = constant($_); |
|
|
60 | no strict 'refs'; |
|
|
61 | *$_ = sub () { $constant }; |
|
|
62 | } |
|
|
63 | |
54 | |
64 | # action code -> string mapping |
55 | # action code -> string mapping |
65 | sub straction($) { |
56 | sub straction($) { |
66 | return 'copying' if $_[0] == &ACT_COPYING; |
57 | return 'copying' if $_[0] == &ACT_COPYING; |
67 | return 'decoding' if $_[0] == &ACT_DECODING; |
58 | return 'decoding' if $_[0] == &ACT_DECODING; |
… | |
… | |
96 | 1; |
87 | 1; |
97 | __END__ |
88 | __END__ |
98 | |
89 | |
99 | =head1 NAME |
90 | =head1 NAME |
100 | |
91 | |
101 | Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). |
92 | Convert::UUlib - decode uu/xx/b64/mime/yenc/etc-encoded data from a massive number of files |
102 | |
93 | |
103 | =head1 SYNOPSIS |
94 | =head1 SYNOPSIS |
104 | |
95 | |
105 | use Convert::UUlib ':all'; |
96 | use Convert::UUlib ':all'; |
106 | |
97 | |
107 | # read all the files named on the commandline and decode them |
98 | # read all the files named on the commandline and decode them |
108 | # into the CURRENT directory. See below for a longer example. |
99 | # into the CURRENT directory. See below for a longer example. |
109 | LoadFile $_ for @ARGV; |
100 | LoadFile $_ for @ARGV; |
110 | for (my $i = 0; my $uu = GetFileListItem $i; $i++) { |
101 | |
|
|
102 | for my $uu (GetFileList) { |
111 | if ($uu->state & FILE_OK) { |
103 | if ($uu->state & FILE_OK) { |
112 | $uu->decode; |
104 | $uu->decode; |
113 | print $uu->filename, "\n"; |
105 | print $uu->filename, "\n"; |
114 | } |
106 | } |
115 | } |
107 | } |
116 | |
108 | |
117 | =head1 DESCRIPTION |
109 | =head1 DESCRIPTION |
|
|
110 | |
|
|
111 | This module started as an interface to the uulib/uudeview library by Frank |
|
|
112 | Pilhofer that can be used to decode all kinds of usenet (and other) |
|
|
113 | binary messages. |
|
|
114 | |
|
|
115 | After upstream abondoned the project, th library was continuously bugfixed |
|
|
116 | and improved in this module, with major focuses on security fixes, |
|
|
117 | correctness and speed (that does not mean that this library is considered |
|
|
118 | safe with untrusted data, but it surely is safer than the poriginal |
|
|
119 | uudeview). |
118 | |
120 | |
119 | Read the file doc/library.pdf from the distribution for in-depth |
121 | Read the file doc/library.pdf from the distribution for in-depth |
120 | information about the C-library used in this interface, and the rest of |
122 | information about the C-library used in this interface, and the rest of |
121 | this document and especially the non-trivial decoder program at the end. |
123 | this document and especially the non-trivial decoder program at the end. |
122 | |
124 | |
… | |
… | |
216 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
218 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
217 | RSS to decode 20000 files. |
219 | RSS to decode 20000 files. |
218 | |
220 | |
219 | =over |
221 | =over |
220 | |
222 | |
221 | =item Initialize |
|
|
222 | |
|
|
223 | Not normally necessary, (re-)initializes the library. |
|
|
224 | |
|
|
225 | =item CleanUp |
223 | =item CleanUp |
226 | |
224 | |
227 | Not normally necessary, could be called at the end to release memory |
225 | Release memory, file items and clean up files. Should be called after a |
228 | before starting a new decoding round. |
226 | decoidng run, if you want to start a new one. |
229 | |
227 | |
230 | =back |
228 | =back |
231 | |
229 | |
232 | =head2 Setting and querying options |
230 | =head2 Setting and querying options |
233 | |
231 | |
… | |
… | |
297 | C<undef> of no file with that number exists. |
295 | C<undef> of no file with that number exists. |
298 | |
296 | |
299 | The first file has number C<0>, and the series has no holes, so you can |
297 | The first file has number C<0>, and the series has no holes, so you can |
300 | iterate over all files by starting with zero and incrementing until you |
298 | iterate over all files by starting with zero and incrementing until you |
301 | hit C<undef>. |
299 | hit C<undef>. |
|
|
300 | |
|
|
301 | This function has to walk the linear list of fils on each access, so |
|
|
302 | if you want to iterate over all items, it is usually faster to use |
|
|
303 | C<GetFileList>. |
|
|
304 | |
|
|
305 | =item @items = GetFileList |
|
|
306 | |
|
|
307 | Similar to C<GetFileListItem>, but returns all files in one go, which is |
|
|
308 | very much faster for large number of items, and has no drawbacks when used |
|
|
309 | for a small number of items. |
302 | |
310 | |
303 | =back |
311 | =back |
304 | |
312 | |
305 | =head2 Decoding files |
313 | =head2 Decoding files |
306 | |
314 | |
… | |
… | |
440 | |
448 | |
441 | =back |
449 | =back |
442 | |
450 | |
443 | =head1 LARGE EXAMPLE DECODER |
451 | =head1 LARGE EXAMPLE DECODER |
444 | |
452 | |
|
|
453 | The general workflow for decoding is like this: |
|
|
454 | |
|
|
455 | =over |
|
|
456 | |
|
|
457 | =item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
|
|
458 | |
|
|
459 | =item 2. Load all source files with C<LoadFile>. |
|
|
460 | |
|
|
461 | =item 3. Optionally C<Smerge>. |
|
|
462 | |
|
|
463 | =item 4. Iterate over all C<GetFileList> items (i.e. result files). |
|
|
464 | |
|
|
465 | =item 5. C<CleanUp> to delete files and free items. |
|
|
466 | |
|
|
467 | =back |
|
|
468 | |
445 | This is the file C<example-decoder> from the distribution, put here |
469 | What follows is the file C<example-decoder> from the distribution that |
446 | instead of more thorough documentation. |
470 | illustrates the above worklfow in a non-trivial example. |
447 | |
471 | |
448 | #!/usr/bin/perl |
472 | #!/usr/bin/perl |
449 | |
473 | |
450 | # decode all the files in the directory uusrc/ and copy |
474 | # decode all the files in the directory uusrc/ and copy |
451 | # the resulting files to uudst/ |
475 | # the resulting files to uudst/ |
… | |
… | |
470 | SetOption OPT_RBUF, 128*1024; |
494 | SetOption OPT_RBUF, 128*1024; |
471 | SetOption OPT_WBUF, 1024*1024; |
495 | SetOption OPT_WBUF, 1024*1024; |
472 | SetOption OPT_IGNMODE, 1; |
496 | SetOption OPT_IGNMODE, 1; |
473 | SetOption OPT_IGNMODE, 1; |
497 | SetOption OPT_IGNMODE, 1; |
474 | SetOption OPT_VERBOSE, 1; |
498 | SetOption OPT_VERBOSE, 1; |
|
|
499 | SetOption OPT_AUTOCHK, 0; |
475 | |
500 | |
476 | # show the three ways you can set callback functions. I normally |
501 | # show the three ways you can set callback functions. I normally |
477 | # prefer the one with the sub inplace. |
502 | # prefer the one with the sub inplace. |
478 | SetFNameFilter \&namefilter; |
503 | SetFNameFilter \&namefilter; |
479 | |
504 | |
… | |
… | |
513 | # otherwise just pass what we have |
538 | # otherwise just pass what we have |
514 | () |
539 | () |
515 | }; |
540 | }; |
516 | |
541 | |
517 | # now read all files in the directory uusrc/* |
542 | # now read all files in the directory uusrc/* |
518 | for(<uusrc/*>) { |
543 | for (<uusrc/*>) { |
519 | my ($retval, $count) = LoadFile ($_, $_, 1); |
544 | my ($retval, $count) = LoadFile ($_, $_, 1); |
520 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
545 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
521 | } |
546 | } |
522 | |
547 | |
|
|
548 | Smerge -1; |
|
|
549 | |
523 | SetOption OPT_SAVEPATH, "uudst/"; |
550 | SetOption OPT_SAVEPATH, "uudst/"; |
524 | |
551 | |
525 | # now wade through all files and their source parts |
552 | # now wade through all files and their source parts |
526 | $i = 0; |
553 | for my $uu (GetFileList) { |
527 | while ($uu = GetFileListItem $i) { |
554 | print "file ", $uu->filename, "\n"; |
528 | $i++; |
|
|
529 | print "file nr. $i"; |
|
|
530 | print " state ", $uu->state; |
555 | print " state ", $uu->state, "\n"; |
531 | print " mode ", $uu->mode; |
556 | print " mode ", $uu->mode, "\n"; |
532 | print " uudet ", strencoding $uu->uudet; |
557 | print " uudet ", strencoding $uu->uudet, "\n"; |
533 | print " size ", $uu->size; |
558 | print " size ", $uu->size, "\n"; |
534 | print " filename ", $uu->filename; |
|
|
535 | print " subfname ", $uu->subfname; |
559 | print " subfname ", $uu->subfname, "\n"; |
536 | print " mimeid ", $uu->mimeid; |
560 | print " mimeid ", $uu->mimeid, "\n"; |
537 | print " mimetype ", $uu->mimetype; |
561 | print " mimetype ", $uu->mimetype, "\n"; |
538 | print "\n"; |
|
|
539 | |
562 | |
540 | # print additional info about all parts |
563 | # print additional info about all parts |
|
|
564 | print " parts"; |
541 | for ($uu->parts) { |
565 | for ($uu->parts) { |
542 | while (my ($k, $v) = each %$_) { |
566 | for my $k (sort keys %$_) { |
543 | print "$k > $v, "; |
567 | print " $k=$_->{$k}"; |
544 | } |
568 | } |
545 | print "\n"; |
569 | print "\n"; |
546 | } |
570 | } |
547 | |
571 | |
548 | print $uu->filename; |
|
|
549 | |
|
|
550 | $uu->remove_temp; |
572 | $uu->remove_temp; |
551 | |
573 | |
552 | if (my $err = $uu->decode ()) { |
574 | if (my $err = $uu->decode) { |
553 | print ", ", strerror $err, "\n"; |
575 | print " ERROR ", strerror $err, "\n"; |
554 | } else { |
576 | } else { |
555 | print ", saved as uudst/", $uu->filename, "\n"; |
577 | print " successfully saved as uudst/", $uu->filename, "\n"; |
556 | } |
578 | } |
557 | } |
579 | } |
558 | |
580 | |
559 | print "cleanup...\n"; |
581 | print "cleanup...\n"; |
560 | |
582 | |
… | |
… | |
587 | the years (includign some defense in depth mechanism that can shield |
609 | the years (includign some defense in depth mechanism that can shield |
588 | against a lot of as-of-yet undetected bugs), using this library for |
610 | against a lot of as-of-yet undetected bugs), using this library for |
589 | security purposes requires care. |
611 | security purposes requires care. |
590 | |
612 | |
591 | Likewise, file sizes when the uulib library was written were tiny compared |
613 | Likewise, file sizes when the uulib library was written were tiny compared |
592 | to today, so do not expect this library to handle files larger than 2GB. |
614 | to today, so do not expect this library to handle files larger than 2GB, |
|
|
615 | certainly not on a 32 bit host. |
|
|
616 | |
|
|
617 | Lastly, this module uses a very "C-like" interface, which means it doesn't |
|
|
618 | protect you from invalid pointers as you might expect from "more perlish" |
|
|
619 | modules - for example, accessing a file item object after calling |
|
|
620 | C<CleanUp> will likely result in crashes, memory corruption, or worse. |
593 | |
621 | |
594 | =head1 AUTHOR |
622 | =head1 AUTHOR |
595 | |
623 | |
596 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
624 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
597 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
625 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |