1 | package Convert::UUlib; |
1 | package Convert::UUlib; |
2 | |
2 | |
3 | no warnings; |
3 | use common::sense; |
4 | use strict; |
|
|
5 | |
4 | |
6 | use Carp; |
5 | use Carp; |
7 | |
6 | |
8 | require Exporter; |
7 | require Exporter; |
9 | require DynaLoader; |
8 | require DynaLoader; |
10 | |
9 | |
11 | our $VERSION = 1.5; |
10 | our $VERSION = 1.71; |
12 | |
11 | |
13 | our @ISA = qw(Exporter DynaLoader); |
12 | our @ISA = qw(Exporter DynaLoader); |
14 | |
13 | |
15 | our @_consts = qw( |
14 | our @_consts = qw( |
16 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
15 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
… | |
… | |
34 | ); |
33 | ); |
35 | |
34 | |
36 | our @_funcs = qw( |
35 | our @_funcs = qw( |
37 | Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
36 | Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
38 | SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
37 | SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
39 | FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp |
38 | FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp |
40 | RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
39 | RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
41 | EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
40 | EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
42 | E_PrepPartial |
41 | E_PrepPartial |
43 | |
42 | |
44 | straction strencoding strmsglevel |
43 | straction strencoding strmsglevel |
… | |
… | |
48 | our @EXPORT_OK = @_funcs; |
47 | our @EXPORT_OK = @_funcs; |
49 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
48 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
50 | |
49 | |
51 | bootstrap Convert::UUlib $VERSION; |
50 | bootstrap Convert::UUlib $VERSION; |
52 | |
51 | |
53 | Initialize(); |
52 | # dummy function for compatiiblity with pre-1.7 versions |
54 | |
53 | sub Initialize { } |
55 | # not when < 5.005_6x |
|
|
56 | # END { CleanUp() } |
|
|
57 | |
|
|
58 | for (@_consts) { |
|
|
59 | my $constant = constant($_); |
|
|
60 | no strict 'refs'; |
|
|
61 | *$_ = sub () { $constant }; |
|
|
62 | } |
|
|
63 | |
54 | |
64 | # action code -> string mapping |
55 | # action code -> string mapping |
65 | sub straction($) { |
56 | sub straction($) { |
66 | return 'copying' if $_[0] == &ACT_COPYING; |
57 | return 'copying' if $_[0] == &ACT_COPYING; |
67 | return 'decoding' if $_[0] == &ACT_DECODING; |
58 | return 'decoding' if $_[0] == &ACT_DECODING; |
… | |
… | |
96 | 1; |
87 | 1; |
97 | __END__ |
88 | __END__ |
98 | |
89 | |
99 | =head1 NAME |
90 | =head1 NAME |
100 | |
91 | |
101 | Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). |
92 | Convert::UUlib - decode uu/xx/b64/mime/yenc/etc-encoded data from a massive number of files |
102 | |
93 | |
103 | =head1 SYNOPSIS |
94 | =head1 SYNOPSIS |
104 | |
95 | |
105 | use Convert::UUlib ':all'; |
96 | use Convert::UUlib ':all'; |
106 | |
97 | |
107 | # read all the files named on the commandline and decode them |
98 | # read all the files named on the commandline and decode them |
108 | # into the CURRENT directory. See below for a longer example. |
99 | # into the CURRENT directory. See below for a longer example. |
109 | LoadFile $_ for @ARGV; |
100 | LoadFile $_ for @ARGV; |
110 | for (my $i = 0; my $uu = GetFileListItem $i; $i++) { |
101 | |
|
|
102 | for my $uu (GetFileList) { |
111 | if ($uu->state & FILE_OK) { |
103 | if ($uu->state & FILE_OK) { |
112 | $uu->decode; |
104 | $uu->decode; |
113 | print $uu->filename, "\n"; |
105 | print $uu->filename, "\n"; |
114 | } |
106 | } |
115 | } |
107 | } |
116 | |
108 | |
117 | =head1 DESCRIPTION |
109 | =head1 DESCRIPTION |
|
|
110 | |
|
|
111 | This module started as an interface to the uulib/uudeview library by Frank |
|
|
112 | Pilhofer that can be used to decode all kinds of usenet (and other) |
|
|
113 | binary messages. |
|
|
114 | |
|
|
115 | After upstream abondoned the project, th library was continuously bugfixed |
|
|
116 | and improved in this module, with major focuses on security fixes, |
|
|
117 | correctness and speed (that does not mean that this library is considered |
|
|
118 | safe with untrusted data, but it surely is safer than the poriginal |
|
|
119 | uudeview). |
118 | |
120 | |
119 | Read the file doc/library.pdf from the distribution for in-depth |
121 | Read the file doc/library.pdf from the distribution for in-depth |
120 | information about the C-library used in this interface, and the rest of |
122 | information about the C-library used in this interface, and the rest of |
121 | this document and especially the non-trivial decoder program at the end. |
123 | this document and especially the non-trivial decoder program at the end. |
122 | |
124 | |
… | |
… | |
214 | again. |
216 | again. |
215 | |
217 | |
216 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
218 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
217 | RSS to decode 20000 files. |
219 | RSS to decode 20000 files. |
218 | |
220 | |
219 | =over 4 |
221 | =over |
220 | |
|
|
221 | =item Initialize |
|
|
222 | |
|
|
223 | Not normally necessary, (re-)initializes the library. |
|
|
224 | |
222 | |
225 | =item CleanUp |
223 | =item CleanUp |
226 | |
224 | |
227 | Not normally necessary, could be called at the end to release memory |
225 | Release memory, file items and clean up files. Should be called after a |
228 | before starting a new decoding round. |
226 | decoidng run, if you want to start a new one. |
229 | |
227 | |
230 | =back |
228 | =back |
231 | |
229 | |
232 | =head2 Setting and querying options |
230 | =head2 Setting and querying options |
233 | |
231 | |
234 | =over 4 |
232 | =over |
235 | |
233 | |
236 | =item $option = GetOption OPT_xxx |
234 | =item $option = GetOption OPT_xxx |
237 | |
235 | |
238 | =item SetOption OPT_xxx, opt-value |
236 | =item SetOption OPT_xxx, opt-value |
239 | |
237 | |
… | |
… | |
241 | |
239 | |
242 | See the C<OPT_xxx> constants above to see which options exist. |
240 | See the C<OPT_xxx> constants above to see which options exist. |
243 | |
241 | |
244 | =head2 Setting various callbacks |
242 | =head2 Setting various callbacks |
245 | |
243 | |
246 | =over 4 |
244 | =over |
247 | |
245 | |
248 | =item SetMsgCallback [callback-function] |
246 | =item SetMsgCallback [callback-function] |
249 | |
247 | |
250 | =item SetBusyCallback [callback-function] |
248 | =item SetBusyCallback [callback-function] |
251 | |
249 | |
… | |
… | |
255 | |
253 | |
256 | =back |
254 | =back |
257 | |
255 | |
258 | =head2 Call the currently selected FNameFilter |
256 | =head2 Call the currently selected FNameFilter |
259 | |
257 | |
260 | =over 4 |
258 | =over |
261 | |
259 | |
262 | =item $file = FNameFilter $file |
260 | =item $file = FNameFilter $file |
263 | |
261 | |
264 | =back |
262 | =back |
265 | |
263 | |
266 | =head2 Loading sourcefiles, optionally fuzzy merge and start decoding |
264 | =head2 Loading sourcefiles, optionally fuzzy merge and start decoding |
267 | |
265 | |
268 | =over 4 |
266 | =over |
269 | |
267 | |
270 | =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
268 | =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
271 | |
269 | |
272 | Load the given file and scan it for encoded contents. Optionally tag it |
270 | Load the given file and scan it for encoded contents. Optionally tag it |
273 | with the given id, and if C<$delflag> is true, delete the file after it |
271 | with the given id, and if C<$delflag> is true, delete the file after it |
… | |
… | |
298 | |
296 | |
299 | The first file has number C<0>, and the series has no holes, so you can |
297 | The first file has number C<0>, and the series has no holes, so you can |
300 | iterate over all files by starting with zero and incrementing until you |
298 | iterate over all files by starting with zero and incrementing until you |
301 | hit C<undef>. |
299 | hit C<undef>. |
302 | |
300 | |
|
|
301 | This function has to walk the linear list of fils on each access, so |
|
|
302 | if you want to iterate over all items, it is usually faster to use |
|
|
303 | C<GetFileList>. |
|
|
304 | |
|
|
305 | =item @items = GetFileList |
|
|
306 | |
|
|
307 | Similar to C<GetFileListItem>, but returns all files in one go, which is |
|
|
308 | very much faster for large number of items, and has no drawbacks when used |
|
|
309 | for a small number of items. |
|
|
310 | |
303 | =back |
311 | =back |
304 | |
312 | |
305 | =head2 Decoding files |
313 | =head2 Decoding files |
306 | |
314 | |
307 | =over 4 |
315 | =over |
308 | |
316 | |
309 | =item $retval = $item->rename($newname) |
317 | =item $retval = $item->rename ($newname) |
310 | |
318 | |
311 | Change the ondisk filename where the decoded file will be saved. |
319 | Change the ondisk filename where the decoded file will be saved. |
312 | |
320 | |
313 | =item $retval = $item->decode_temp |
321 | =item $retval = $item->decode_temp |
314 | |
322 | |
… | |
… | |
317 | |
325 | |
318 | =item $retval = $item->remove_temp |
326 | =item $retval = $item->remove_temp |
319 | |
327 | |
320 | Remove the temporarily decoded file again. |
328 | Remove the temporarily decoded file again. |
321 | |
329 | |
322 | =item $retval = $item->decode([$target_path]) |
330 | =item $retval = $item->decode ([$target_path]) |
323 | |
331 | |
324 | Decode the file to it's destination, or the given target path. |
332 | Decode the file to its destination, or the given target path. |
325 | |
333 | |
326 | =item $retval = $item->info(callback-function) |
334 | =item $retval = $item->info (callback-function) |
327 | |
335 | |
328 | =back |
336 | =back |
329 | |
337 | |
330 | =head2 Querying (and setting) item attributes |
338 | =head2 Querying (and setting) item attributes |
331 | |
339 | |
332 | =over 4 |
340 | =over |
333 | |
341 | |
334 | =item $state = $item->state |
342 | =item $state = $item->state |
335 | |
343 | |
336 | =item $mode = $item->mode([newmode]) |
344 | =item $mode = $item->mode ([newmode]) |
337 | |
345 | |
338 | =item $uudet = $item->uudet |
346 | =item $uudet = $item->uudet |
339 | |
347 | |
340 | =item $size = $item->size |
348 | =item $size = $item->size |
341 | |
349 | |
342 | =item $filename = $item->filename([newfilename}) |
350 | =item $filename = $item->filename ([newfilename}) |
343 | |
351 | |
344 | =item $subfname = $item->subfname |
352 | =item $subfname = $item->subfname |
345 | |
353 | |
346 | =item $mimeid = $item->mimeid |
354 | =item $mimeid = $item->mimeid |
347 | |
355 | |
… | |
… | |
351 | |
359 | |
352 | =back |
360 | =back |
353 | |
361 | |
354 | =head2 Information about source parts |
362 | =head2 Information about source parts |
355 | |
363 | |
356 | =over 4 |
364 | =over |
357 | |
365 | |
358 | =item $parts = $item->parts |
366 | =item $parts = $item->parts |
359 | |
367 | |
360 | Return information about all parts (source files) used to decode the file |
368 | Return information about all parts (source files) used to decode the file |
361 | as a list of hashrefs with the following structure: |
369 | as a list of hashrefs with the following structure: |
… | |
… | |
375 | Usually you are interested mostly the C<sfname> and possibly the C<partno> |
383 | Usually you are interested mostly the C<sfname> and possibly the C<partno> |
376 | and C<filename> members. |
384 | and C<filename> members. |
377 | |
385 | |
378 | =back |
386 | =back |
379 | |
387 | |
380 | =head2 Functions below not documented and not very well tested |
388 | =head2 Functions below are not documented and not very well tested - feedback welcome |
381 | |
389 | |
382 | QuickDecode |
390 | QuickDecode |
383 | EncodeMulti |
391 | EncodeMulti |
384 | EncodePartial |
392 | EncodePartial |
385 | EncodeToStream |
393 | EncodeToStream |
… | |
… | |
389 | |
397 | |
390 | =head2 EXTENSION FUNCTIONS |
398 | =head2 EXTENSION FUNCTIONS |
391 | |
399 | |
392 | Functions found in this module but not documented in the uulib documentation: |
400 | Functions found in this module but not documented in the uulib documentation: |
393 | |
401 | |
394 | =over 4 |
402 | =over |
395 | |
403 | |
396 | =item $msg = straction ACT_xxx |
404 | =item $msg = straction ACT_xxx |
397 | |
405 | |
398 | Return a human readable string representing the given action code. |
406 | Return a human readable string representing the given action code. |
399 | |
407 | |
… | |
… | |
440 | |
448 | |
441 | =back |
449 | =back |
442 | |
450 | |
443 | =head1 LARGE EXAMPLE DECODER |
451 | =head1 LARGE EXAMPLE DECODER |
444 | |
452 | |
|
|
453 | The general workflow for decoding is like this: |
|
|
454 | |
|
|
455 | =over |
|
|
456 | |
|
|
457 | =item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
|
|
458 | |
|
|
459 | =item 2. Load all source files with C<LoadFile>. |
|
|
460 | |
|
|
461 | =item 3. Optionally C<Smerge>. |
|
|
462 | |
|
|
463 | =item 4. Iterate over all C<GetFileList> items (i.e. result files). |
|
|
464 | |
|
|
465 | =item 5. C<CleanUp> to delete files and free items. |
|
|
466 | |
|
|
467 | =back |
|
|
468 | |
445 | This is the file C<example-decoder> from the distribution, put here |
469 | What follows is the file C<example-decoder> from the distribution that |
446 | instead of more thorough documentation. |
470 | illustrates the above worklfow in a non-trivial example. |
447 | |
471 | |
448 | #!/usr/bin/perl |
472 | #!/usr/bin/perl |
449 | |
473 | |
450 | # decode all the files in the directory uusrc/ and copy |
474 | # decode all the files in the directory uusrc/ and copy |
451 | # the resulting files to uudst/ |
475 | # the resulting files to uudst/ |
… | |
… | |
470 | SetOption OPT_RBUF, 128*1024; |
494 | SetOption OPT_RBUF, 128*1024; |
471 | SetOption OPT_WBUF, 1024*1024; |
495 | SetOption OPT_WBUF, 1024*1024; |
472 | SetOption OPT_IGNMODE, 1; |
496 | SetOption OPT_IGNMODE, 1; |
473 | SetOption OPT_IGNMODE, 1; |
497 | SetOption OPT_IGNMODE, 1; |
474 | SetOption OPT_VERBOSE, 1; |
498 | SetOption OPT_VERBOSE, 1; |
|
|
499 | SetOption OPT_AUTOCHK, 0; |
475 | |
500 | |
476 | # show the three ways you can set callback functions. I normally |
501 | # show the three ways you can set callback functions. I normally |
477 | # prefer the one with the sub inplace. |
502 | # prefer the one with the sub inplace. |
478 | SetFNameFilter \&namefilter; |
503 | SetFNameFilter \&namefilter; |
479 | |
504 | |
… | |
… | |
513 | # otherwise just pass what we have |
538 | # otherwise just pass what we have |
514 | () |
539 | () |
515 | }; |
540 | }; |
516 | |
541 | |
517 | # now read all files in the directory uusrc/* |
542 | # now read all files in the directory uusrc/* |
518 | for(<uusrc/*>) { |
543 | for (<uusrc/*>) { |
519 | my ($retval, $count) = LoadFile ($_, $_, 1); |
544 | my ($retval, $count) = LoadFile ($_, $_, 1); |
520 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
545 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
521 | } |
546 | } |
522 | |
547 | |
|
|
548 | Smerge -1; |
|
|
549 | |
523 | SetOption OPT_SAVEPATH, "uudst/"; |
550 | SetOption OPT_SAVEPATH, "uudst/"; |
524 | |
551 | |
525 | # now wade through all files and their source parts |
552 | # now wade through all files and their source parts |
526 | $i = 0; |
553 | for my $uu (GetFileList) { |
527 | while ($uu = GetFileListItem $i) { |
554 | print "file ", $uu->filename, "\n"; |
528 | $i++; |
|
|
529 | print "file nr. $i"; |
|
|
530 | print " state ", $uu->state; |
555 | print " state ", $uu->state, "\n"; |
531 | print " mode ", $uu->mode; |
556 | print " mode ", $uu->mode, "\n"; |
532 | print " uudet ", strencoding $uu->uudet; |
557 | print " uudet ", strencoding $uu->uudet, "\n"; |
533 | print " size ", $uu->size; |
558 | print " size ", $uu->size, "\n"; |
534 | print " filename ", $uu->filename; |
|
|
535 | print " subfname ", $uu->subfname; |
559 | print " subfname ", $uu->subfname, "\n"; |
536 | print " mimeid ", $uu->mimeid; |
560 | print " mimeid ", $uu->mimeid, "\n"; |
537 | print " mimetype ", $uu->mimetype; |
561 | print " mimetype ", $uu->mimetype, "\n"; |
538 | print "\n"; |
|
|
539 | |
562 | |
540 | # print additional info about all parts |
563 | # print additional info about all parts |
|
|
564 | print " parts"; |
541 | for ($uu->parts) { |
565 | for ($uu->parts) { |
542 | while (my ($k, $v) = each %$_) { |
566 | for my $k (sort keys %$_) { |
543 | print "$k > $v, "; |
567 | print " $k=$_->{$k}"; |
544 | } |
568 | } |
545 | print "\n"; |
569 | print "\n"; |
546 | } |
570 | } |
547 | |
571 | |
548 | print $uu->filename; |
|
|
549 | |
|
|
550 | $uu->remove_temp; |
572 | $uu->remove_temp; |
551 | |
573 | |
552 | if (my $err = $uu->decode ()) { |
574 | if (my $err = $uu->decode) { |
553 | print ", ", strerror $err, "\n"; |
575 | print " ERROR ", strerror $err, "\n"; |
554 | } else { |
576 | } else { |
555 | print ", saved as uudst/", $uu->filename, "\n"; |
577 | print " successfully saved as uudst/", $uu->filename, "\n"; |
556 | } |
578 | } |
557 | } |
579 | } |
558 | |
580 | |
559 | print "cleanup...\n"; |
581 | print "cleanup...\n"; |
560 | |
582 | |
561 | CleanUp; |
583 | CleanUp; |
|
|
584 | |
|
|
585 | =head1 PERLMULTICORE SUPPORT |
|
|
586 | |
|
|
587 | This module supports the perlmulticore standard (see |
|
|
588 | L<http://perlmulticore.schmorp.de/> for more info) for the following |
|
|
589 | functions - generally these are functions accessing the disk and/or using |
|
|
590 | considerable CPU time: |
|
|
591 | |
|
|
592 | LoadFile |
|
|
593 | $item->decode |
|
|
594 | $item->decode_temp |
|
|
595 | $item->remove_temp |
|
|
596 | $item->info |
|
|
597 | |
|
|
598 | The perl interpreter will be reacquired/released on every callback |
|
|
599 | invocation, so for performance reasons, callbacks should be avoided if |
|
|
600 | that is costly. |
|
|
601 | |
|
|
602 | Future versions might enable multicore support for more functions. |
|
|
603 | |
|
|
604 | =head1 BUGS AND LIMITATIONS |
|
|
605 | |
|
|
606 | The original uulib library this module uses was written at a time where |
|
|
607 | main memory of measured in megabytes and buffer overflows as a security |
|
|
608 | thign didn't exist. While a lot of security fixes have been applied over |
|
|
609 | the years (includign some defense in depth mechanism that can shield |
|
|
610 | against a lot of as-of-yet undetected bugs), using this library for |
|
|
611 | security purposes requires care. |
|
|
612 | |
|
|
613 | Likewise, file sizes when the uulib library was written were tiny compared |
|
|
614 | to today, so do not expect this library to handle files larger than 2GB. |
|
|
615 | |
|
|
616 | Lastly, this module uses a very "C-like" interface, which means it doesn't |
|
|
617 | protect you from invalid points as you might expect from "more perlish" |
|
|
618 | modules - for example, accessing a file item object after callinbg |
|
|
619 | C<CleanUp> will likely result in crashes, memory corruption, or worse. |
562 | |
620 | |
563 | =head1 AUTHOR |
621 | =head1 AUTHOR |
564 | |
622 | |
565 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
623 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
566 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
624 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
567 | bugfixed by Marc Lehmann. |
625 | bugfixed by Marc Lehmann. |
568 | |
626 | |
569 | =head1 SEE ALSO |
627 | =head1 SEE ALSO |
570 | |
628 | |
571 | perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. |
629 | perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>. |
572 | |
630 | |
573 | =cut |
631 | =cut |
|
|
632 | |