1 | package Convert::UUlib; |
1 | package Convert::UUlib; |
2 | |
2 | |
3 | no warnings; |
3 | use common::sense; |
4 | use strict; |
|
|
5 | |
4 | |
6 | use Carp; |
5 | use Carp; |
7 | |
6 | |
8 | require Exporter; |
7 | require Exporter; |
9 | require DynaLoader; |
8 | require DynaLoader; |
10 | |
9 | |
11 | our $VERSION = 1.62; |
10 | our $VERSION = 1.8; |
12 | |
11 | |
13 | our @ISA = qw(Exporter DynaLoader); |
12 | our @ISA = qw(Exporter DynaLoader); |
14 | |
13 | |
15 | our @_consts = qw( |
14 | our @_consts = qw( |
16 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
15 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
… | |
… | |
48 | our @EXPORT_OK = @_funcs; |
47 | our @EXPORT_OK = @_funcs; |
49 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
48 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
50 | |
49 | |
51 | bootstrap Convert::UUlib $VERSION; |
50 | bootstrap Convert::UUlib $VERSION; |
52 | |
51 | |
53 | Initialize(); |
52 | # dummy function for compatiiblity with pre-1.7 versions |
54 | |
53 | sub Initialize { } |
55 | # not when < 5.005_6x |
|
|
56 | # END { CleanUp() } |
|
|
57 | |
|
|
58 | for (@_consts) { |
|
|
59 | my $constant = constant($_); |
|
|
60 | no strict 'refs'; |
|
|
61 | *$_ = sub () { $constant }; |
|
|
62 | } |
|
|
63 | |
54 | |
64 | # action code -> string mapping |
55 | # action code -> string mapping |
65 | sub straction($) { |
56 | sub straction($) { |
66 | return 'copying' if $_[0] == &ACT_COPYING; |
57 | return 'copying' if $_[0] == &ACT_COPYING; |
67 | return 'decoding' if $_[0] == &ACT_DECODING; |
58 | return 'decoding' if $_[0] == &ACT_DECODING; |
… | |
… | |
96 | 1; |
87 | 1; |
97 | __END__ |
88 | __END__ |
98 | |
89 | |
99 | =head1 NAME |
90 | =head1 NAME |
100 | |
91 | |
101 | Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). |
92 | Convert::UUlib - decode uu/xx/b64/mime/yenc/etc-encoded data from a massive number of files |
102 | |
93 | |
103 | =head1 SYNOPSIS |
94 | =head1 SYNOPSIS |
104 | |
95 | |
105 | use Convert::UUlib ':all'; |
96 | use Convert::UUlib ':all'; |
106 | |
97 | |
107 | # read all the files named on the commandline and decode them |
98 | # read all the files named on the commandline and decode them |
108 | # into the CURRENT directory. See below for a longer example. |
99 | # into the CURRENT directory. See below for a longer example. |
109 | LoadFile $_ for @ARGV; |
100 | LoadFile $_ for @ARGV; |
|
|
101 | |
110 | for my $uu (GetFileList) { |
102 | for my $uu (GetFileList) { |
111 | if ($uu->state & FILE_OK) { |
103 | if ($uu->state & FILE_OK) { |
112 | $uu->decode; |
104 | $uu->decode; |
113 | print $uu->filename, "\n"; |
105 | print $uu->filename, "\n"; |
114 | } |
106 | } |
115 | } |
107 | } |
116 | |
108 | |
117 | =head1 DESCRIPTION |
109 | =head1 DESCRIPTION |
118 | |
110 | |
|
|
111 | This module started as an interface to the uulib/uudeview library by Frank |
|
|
112 | Pilhofer that can be used to decode all kinds of usenet (and other) |
|
|
113 | binary messages. |
|
|
114 | |
|
|
115 | After upstream abondoned the project, the library was continuously bugfixed |
|
|
116 | and improved in this module, with major focuses on security fixes, |
|
|
117 | correctness and speed (that does not mean that this library is considered |
|
|
118 | safe with untrusted data, but it surely is safer than the original |
|
|
119 | uudeview). |
|
|
120 | |
119 | Read the file doc/library.pdf from the distribution for in-depth |
121 | Read the file doc/library.pdf from the distribution for in-depth |
120 | information about the C-library used in this interface, and the rest of |
122 | information about the C-library used in this interface, and the rest of |
121 | this document and especially the non-trivial decoder program at the end. |
123 | this document and especially the non-trivial decoder program at the end. |
122 | |
124 | |
123 | =head1 EXPORTED CONSTANTS |
125 | =head1 EXPORTED CONSTANTS |
… | |
… | |
142 | =head2 Options |
144 | =head2 Options |
143 | |
145 | |
144 | OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
146 | OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
145 | OPT_FAST assumes only one part per file |
147 | OPT_FAST assumes only one part per file |
146 | OPT_DUMBNESS switch off the program's intelligence |
148 | OPT_DUMBNESS switch off the program's intelligence |
147 | OPT_BRACKPOL give numbers in [] higher precendence |
149 | OPT_BRACKPOL give numbers in [] higher precedence |
148 | OPT_VERBOSE generate informative messages |
150 | OPT_VERBOSE generate informative messages |
149 | OPT_DESPERATE try to decode incomplete files |
151 | OPT_DESPERATE try to decode incomplete files |
150 | OPT_IGNREPLY ignore RE:plies (off by default) |
152 | OPT_IGNREPLY ignore RE:plies (off by default) |
151 | OPT_OVERWRITE whether it's OK to overwrite ex. files |
153 | OPT_OVERWRITE whether it's OK to overwrite ex. files |
152 | OPT_SAVEPATH prefix to save-files on disk |
154 | OPT_SAVEPATH prefix to save-files on disk |
… | |
… | |
216 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
218 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
217 | RSS to decode 20000 files. |
219 | RSS to decode 20000 files. |
218 | |
220 | |
219 | =over |
221 | =over |
220 | |
222 | |
221 | =item Initialize |
|
|
222 | |
|
|
223 | Not normally necessary, (re-)initializes the library. |
|
|
224 | |
|
|
225 | =item CleanUp |
223 | =item CleanUp |
226 | |
224 | |
227 | Not normally necessary, could be called at the end to release memory |
225 | Release memory, file items and clean up files. Should be called after a |
228 | before starting a new decoding round. |
226 | decoidng run, if you want to start a new one. |
229 | |
227 | |
230 | =back |
228 | =back |
231 | |
229 | |
232 | =head2 Setting and querying options |
230 | =head2 Setting and querying options |
233 | |
231 | |
… | |
… | |
304 | if you want to iterate over all items, it is usually faster to use |
302 | if you want to iterate over all items, it is usually faster to use |
305 | C<GetFileList>. |
303 | C<GetFileList>. |
306 | |
304 | |
307 | =item @items = GetFileList |
305 | =item @items = GetFileList |
308 | |
306 | |
309 | Similar to C<GetFileListItem>, but returns all files in one go. |
307 | Similar to C<GetFileListItem>, but returns all files in one go, which is |
|
|
308 | very much faster for large number of items, and has no drawbacks when used |
|
|
309 | for a small number of items. |
310 | |
310 | |
311 | =back |
311 | =back |
312 | |
312 | |
313 | =head2 Decoding files |
313 | =head2 Decoding files |
314 | |
314 | |
… | |
… | |
448 | |
448 | |
449 | =back |
449 | =back |
450 | |
450 | |
451 | =head1 LARGE EXAMPLE DECODER |
451 | =head1 LARGE EXAMPLE DECODER |
452 | |
452 | |
|
|
453 | The general workflow for decoding is like this: |
|
|
454 | |
|
|
455 | =over |
|
|
456 | |
|
|
457 | =item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
|
|
458 | |
|
|
459 | =item 2. Load all source files with C<LoadFile>. |
|
|
460 | |
|
|
461 | =item 3. Optionally C<Smerge>. |
|
|
462 | |
|
|
463 | =item 4. Iterate over all C<GetFileList> items (i.e. result files). |
|
|
464 | |
|
|
465 | =item 5. C<CleanUp> to delete files and free items. |
|
|
466 | |
|
|
467 | =back |
|
|
468 | |
453 | This is the file C<example-decoder> from the distribution, put here |
469 | What follows is the file C<example-decoder> from the distribution that |
454 | instead of more thorough documentation. |
470 | illustrates the above worklfow in a non-trivial example. |
455 | |
471 | |
456 | #!/usr/bin/perl |
472 | #!/usr/bin/perl |
457 | |
473 | |
458 | # decode all the files in the directory uusrc/ and copy |
474 | # decode all the files in the directory uusrc/ and copy |
459 | # the resulting files to uudst/ |
475 | # the resulting files to uudst/ |
… | |
… | |
478 | SetOption OPT_RBUF, 128*1024; |
494 | SetOption OPT_RBUF, 128*1024; |
479 | SetOption OPT_WBUF, 1024*1024; |
495 | SetOption OPT_WBUF, 1024*1024; |
480 | SetOption OPT_IGNMODE, 1; |
496 | SetOption OPT_IGNMODE, 1; |
481 | SetOption OPT_IGNMODE, 1; |
497 | SetOption OPT_IGNMODE, 1; |
482 | SetOption OPT_VERBOSE, 1; |
498 | SetOption OPT_VERBOSE, 1; |
|
|
499 | SetOption OPT_AUTOCHK, 0; |
483 | |
500 | |
484 | # show the three ways you can set callback functions. I normally |
501 | # show the three ways you can set callback functions. I normally |
485 | # prefer the one with the sub inplace. |
502 | # prefer the one with the sub inplace. |
486 | SetFNameFilter \&namefilter; |
503 | SetFNameFilter \&namefilter; |
487 | |
504 | |
… | |
… | |
525 | # now read all files in the directory uusrc/* |
542 | # now read all files in the directory uusrc/* |
526 | for (<uusrc/*>) { |
543 | for (<uusrc/*>) { |
527 | my ($retval, $count) = LoadFile ($_, $_, 1); |
544 | my ($retval, $count) = LoadFile ($_, $_, 1); |
528 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
545 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
529 | } |
546 | } |
|
|
547 | |
|
|
548 | Smerge -1; |
530 | |
549 | |
531 | SetOption OPT_SAVEPATH, "uudst/"; |
550 | SetOption OPT_SAVEPATH, "uudst/"; |
532 | |
551 | |
533 | # now wade through all files and their source parts |
552 | # now wade through all files and their source parts |
534 | for my $uu (GetFileList) { |
553 | for my $uu (GetFileList) { |
… | |
… | |
583 | Future versions might enable multicore support for more functions. |
602 | Future versions might enable multicore support for more functions. |
584 | |
603 | |
585 | =head1 BUGS AND LIMITATIONS |
604 | =head1 BUGS AND LIMITATIONS |
586 | |
605 | |
587 | The original uulib library this module uses was written at a time where |
606 | The original uulib library this module uses was written at a time where |
588 | main memory of measured in megabytes and buffer overflows as a security |
607 | main memory was measured in megabytes and buffer overflows as a security |
589 | thign didn't exist. While a lot of security fixes have been applied over |
608 | thing didn't exist. While a lot of security fixes have been applied over |
590 | the years (includign some defense in depth mechanism that can shield |
609 | the years (including some defense in depth mechanism that can shield |
591 | against a lot of as-of-yet undetected bugs), using this library for |
610 | against a lot of as-of-yet undetected bugs), using this library for |
592 | security purposes requires care. |
611 | security purposes requires care. |
593 | |
612 | |
594 | Likewise, file sizes when the uulib library was written were tiny compared |
613 | Likewise, file sizes when the uulib library was written were tiny compared |
595 | to today, so do not expect this library to handle files larger than 2GB. |
614 | to today, so do not expect this library to handle files larger than 2GB, |
|
|
615 | certainly not on a 32 bit host. |
|
|
616 | |
|
|
617 | Lastly, this module uses a very "C-like" interface, which means it doesn't |
|
|
618 | protect you from invalid pointers as you might expect from "more perlish" |
|
|
619 | modules - for example, accessing a file item object after calling |
|
|
620 | C<CleanUp> will likely result in crashes, memory corruption, or worse. |
596 | |
621 | |
597 | =head1 AUTHOR |
622 | =head1 AUTHOR |
598 | |
623 | |
599 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
624 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
600 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
625 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |