ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.28 by root, Sun Mar 25 00:18:07 2007 UTC vs.
Revision 1.48 by root, Fri Feb 28 17:19:02 2020 UTC

6use Carp; 6use Carp;
7 7
8require Exporter; 8require Exporter;
9require DynaLoader; 9require DynaLoader;
10 10
11our $VERSION = '1.08'; 11our $VERSION = 1.62;
12 12
13our @ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
14 14
15our @_consts = qw( 15our @_consts = qw(
16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
22 22
23 OPT_RBUF OPT_WBUF 23 OPT_RBUF OPT_WBUF
24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
28 28
29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
31 31
32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
34); 34);
35 35
36our @_funcs = qw( 36our @_funcs = qw(
37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
42 E_PrepPartial 42 E_PrepPartial
43 43
44 straction strencoding strmsglevel 44 straction strencoding strmsglevel
48our @EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
50 50
51bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
52 52
53Initialize(); 53# dummy function for compatiiblity with pre-1.7 versions
54 54sub Initialize { }
55# not when < 5.005_6x
56# END { CleanUp() }
57 55
58for (@_consts) { 56for (@_consts) {
59 my $constant = constant($_); 57 my $constant = constant ($_);
60 no strict 'refs'; 58 no strict 'refs';
61 *$_ = sub () { $constant }; 59 *$_ = sub () { $constant };
62} 60}
63 61
64# action code -> string mapping 62# action code -> string mapping
105 use Convert::UUlib ':all'; 103 use Convert::UUlib ':all';
106 104
107 # read all the files named on the commandline and decode them 105 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example. 106 # into the CURRENT directory. See below for a longer example.
109 LoadFile $_ for @ARGV; 107 LoadFile $_ for @ARGV;
110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 108
109 for my $uu (GetFileList) {
111 if ($uu->state & FILE_OK) { 110 if ($uu->state & FILE_OK) {
112 $uu->decode; 111 $uu->decode;
113 print $uu->filename, "\n"; 112 print $uu->filename, "\n";
114 } 113 }
115 } 114 }
159 OPT_TINYB64 detect short B64 outside of Mime 158 OPT_TINYB64 detect short B64 outside of Mime
160 OPT_ENCEXT extension for single-part encoded files 159 OPT_ENCEXT extension for single-part encoded files
161 OPT_REMOVE remove input files after decoding (dangerous) 160 OPT_REMOVE remove input files after decoding (dangerous)
162 OPT_MOREMIME strict MIME adherence 161 OPT_MOREMIME strict MIME adherence
163 OPT_DOTDOT ".."-unescaping has not yet been done on input files 162 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL* 163 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL* 164 OPT_WBUF set default write I/O buffer size in bytes
165 OPT_AUTOCHECK automatically check file list after every loadfile
166 166
167=head2 Result/Error codes 167=head2 Result/Error codes
168 168
169 RET_OK everything went fine 169 RET_OK everything went fine
170 RET_IOERR I/O Error - examine errno 170 RET_IOERR I/O Error - examine errno
213again. 213again.
214 214
215On my machine, a fairly complete decode with DBI backend needs about 10MB 215On my machine, a fairly complete decode with DBI backend needs about 10MB
216RSS to decode 20000 files. 216RSS to decode 20000 files.
217 217
218=over 4 218=over
219
220=item Initialize
221
222Not normally necessary, (re-)initializes the library.
223 219
224=item CleanUp 220=item CleanUp
225 221
226Not normally necessary, could be called at the end to release memory 222Release memory, file items and clean up files. Should be called after a
227before starting a new decoding round. 223decoidng run, if you want to start a new one.
228 224
229=back 225=back
230 226
231=head2 Setting and querying options 227=head2 Setting and querying options
232 228
233=over 4 229=over
234 230
235=item $option = GetOption OPT_xxx 231=item $option = GetOption OPT_xxx
236 232
237=item SetOption OPT_xxx, opt-value 233=item SetOption OPT_xxx, opt-value
238 234
240 236
241See the C<OPT_xxx> constants above to see which options exist. 237See the C<OPT_xxx> constants above to see which options exist.
242 238
243=head2 Setting various callbacks 239=head2 Setting various callbacks
244 240
245=over 4 241=over
246 242
247=item SetMsgCallback [callback-function] 243=item SetMsgCallback [callback-function]
248 244
249=item SetBusyCallback [callback-function] 245=item SetBusyCallback [callback-function]
250 246
254 250
255=back 251=back
256 252
257=head2 Call the currently selected FNameFilter 253=head2 Call the currently selected FNameFilter
258 254
259=over 4 255=over
260 256
261=item $file = FNameFilter $file 257=item $file = FNameFilter $file
262 258
263=back 259=back
264 260
265=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 261=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
266 262
267=over 4 263=over
268 264
269=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] 265=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
270 266
271Load the given file and scan it for encoded contents. Optionally tag it 267Load the given file and scan it for encoded contents. Optionally tag it
272with the given id, and if C<$delflag> is true, delete the file after it 268with the given id, and if C<$delflag> is true, delete the file after it
281If you are desperate, try to call C<Smerge> with increasing C<$pass> 277If you are desperate, try to call C<Smerge> with increasing C<$pass>
282values, beginning at C<0>, to try to merge parts that usually would not 278values, beginning at C<0>, to try to merge parts that usually would not
283have been merged. 279have been merged.
284 280
285Most probably this will result in garbled files, so never do this by 281Most probably this will result in garbled files, so never do this by
286default. 282default, except:
283
284If the C<OPT_AUTOCHECK> option has been disabled (by default it is
285enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
286after loading all files as an additional pre-pass (which is normally done
287by C<LoadFile>).
287 288
288=item $item = GetFileListItem $item_number 289=item $item = GetFileListItem $item_number
289 290
290Return the C<$item> structure for the C<$item_number>'th found file, or 291Return the C<$item> structure for the C<$item_number>'th found file, or
291C<undef> of no file with that number exists. 292C<undef> of no file with that number exists.
292 293
293The first file has number C<0>, and the series has no holes, so you can 294The first file has number C<0>, and the series has no holes, so you can
294iterate over all files by starting with zero and incrementing until you 295iterate over all files by starting with zero and incrementing until you
295hit C<undef>. 296hit C<undef>.
296 297
298This function has to walk the linear list of fils on each access, so
299if you want to iterate over all items, it is usually faster to use
300C<GetFileList>.
301
302=item @items = GetFileList
303
304Similar to C<GetFileListItem>, but returns all files in one go.
305
297=back 306=back
298 307
299=head2 Decoding files 308=head2 Decoding files
300 309
301=over 4 310=over
302 311
303=item $retval = $item->rename($newname) 312=item $retval = $item->rename ($newname)
304 313
305Change the ondisk filename where the decoded file will be saved. 314Change the ondisk filename where the decoded file will be saved.
306 315
307=item $retval = $item->decode_temp 316=item $retval = $item->decode_temp
308 317
311 320
312=item $retval = $item->remove_temp 321=item $retval = $item->remove_temp
313 322
314Remove the temporarily decoded file again. 323Remove the temporarily decoded file again.
315 324
316=item $retval = $item->decode([$target_path]) 325=item $retval = $item->decode ([$target_path])
317 326
318Decode the file to it's destination, or the given target path. 327Decode the file to its destination, or the given target path.
319 328
320=item $retval = $item->info(callback-function) 329=item $retval = $item->info (callback-function)
321 330
322=back 331=back
323 332
324=head2 Querying (and setting) item attributes 333=head2 Querying (and setting) item attributes
325 334
326=over 4 335=over
327 336
328=item $state = $item->state 337=item $state = $item->state
329 338
330=item $mode = $item->mode([newmode]) 339=item $mode = $item->mode ([newmode])
331 340
332=item $uudet = $item->uudet 341=item $uudet = $item->uudet
333 342
334=item $size = $item->size 343=item $size = $item->size
335 344
336=item $filename = $item->filename([newfilename}) 345=item $filename = $item->filename ([newfilename})
337 346
338=item $subfname = $item->subfname 347=item $subfname = $item->subfname
339 348
340=item $mimeid = $item->mimeid 349=item $mimeid = $item->mimeid
341 350
345 354
346=back 355=back
347 356
348=head2 Information about source parts 357=head2 Information about source parts
349 358
350=over 4 359=over
351 360
352=item $parts = $item->parts 361=item $parts = $item->parts
353 362
354Return information about all parts (source files) used to decode the file 363Return information about all parts (source files) used to decode the file
355as a list of hashrefs with the following structure: 364as a list of hashrefs with the following structure:
369Usually you are interested mostly the C<sfname> and possibly the C<partno> 378Usually you are interested mostly the C<sfname> and possibly the C<partno>
370and C<filename> members. 379and C<filename> members.
371 380
372=back 381=back
373 382
374=head2 Functions below not documented and not very well tested 383=head2 Functions below are not documented and not very well tested - feedback welcome
375 384
376 QuickDecode 385 QuickDecode
377 EncodeMulti 386 EncodeMulti
378 EncodePartial 387 EncodePartial
379 EncodeToStream 388 EncodeToStream
383 392
384=head2 EXTENSION FUNCTIONS 393=head2 EXTENSION FUNCTIONS
385 394
386Functions found in this module but not documented in the uulib documentation: 395Functions found in this module but not documented in the uulib documentation:
387 396
388=over 4 397=over
389 398
390=item $msg = straction ACT_xxx 399=item $msg = straction ACT_xxx
391 400
392Return a human readable string representing the given action code. 401Return a human readable string representing the given action code.
393 402
434 443
435=back 444=back
436 445
437=head1 LARGE EXAMPLE DECODER 446=head1 LARGE EXAMPLE DECODER
438 447
448The general workflow for decoding is like this:
449
450=over
451
452=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
453
454=item 2. Load all source files with C<LoadFile>.
455
456=item 3. Optionally C<Smerge>.
457
458=item 4. Iterate over all C<GetFileList> items (i.e. result files).
459
460=item 5. C<CleanUp> to delete files and free items.
461
462=back
463
439This is the file C<example-decoder> from the distribution, put here 464What follows is the file C<example-decoder> from the distribution that
440instead of more thorough documentation. 465illustrates the above worklfow in a non-trivial example.
441 466
467 #!/usr/bin/perl
468
442 # decode all the files in the directory uusrc/ and copy 469 # decode all the files in the directory uusrc/ and copy
443 # the resulting files to uudst/ 470 # the resulting files to uudst/
444 471
445 use Convert::UUlib ':all'; 472 use Convert::UUlib ':all';
446 473
447 sub namefilter { 474 sub namefilter {
448 my($path)=@_; 475 my ($path) = @_;
476
449 $path=~s/^.*[\/\\]//; 477 $path=~s/^.*[\/\\]//;
478
450 $path; 479 $path
451 } 480 }
452 481
453 sub busycb { 482 sub busycb {
454 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 483 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
455 $_[0]=straction($action); 484 $_[0]=straction($action);
456 print "busy_callback(", (join ",",@_), ")\n"; 485 print "busy_callback(", (join ",",@_), ")\n";
457 0; 486 0
458 } 487 }
459 488
489 SetOption OPT_RBUF, 128*1024;
490 SetOption OPT_WBUF, 1024*1024;
460 SetOption OPT_IGNMODE, 1; 491 SetOption OPT_IGNMODE, 1;
492 SetOption OPT_IGNMODE, 1;
461 SetOption OPT_VERBOSE, 1; 493 SetOption OPT_VERBOSE, 1;
462 494
463 # show the three ways you can set callback functions. I normally 495 # show the three ways you can set callback functions. I normally
464 # prefer the one with the sub inplace. 496 # prefer the one with the sub inplace.
465 SetFNameFilter \&namefilter; 497 SetFNameFilter \&namefilter;
466 498
467 SetBusyCallback "busycb", 333; 499 SetBusyCallback "busycb", 333;
468 500
469 SetMsgCallback sub { 501 SetMsgCallback sub {
470 my ($msg, $level) = @_; 502 my ($msg, $level) = @_;
471 print uc strmsglevel $_[1], ": $msg\n"; 503 print uc strmsglevel $_[1], ": $msg\n";
472 }; 504 };
473 505
474 # the following non-trivial FileNameCallback takes care 506 # the following non-trivial FileNameCallback takes care
475 # of some subject lines not detected properly by uulib: 507 # of some subject lines not detected properly by uulib:
476 SetFileNameCallback sub { 508 SetFileNameCallback sub {
477 return unless $_[1]; # skip "Re:"-plies et al. 509 return unless $_[1]; # skip "Re:"-plies et al.
478 local $_ = $_[0]; 510 local $_ = $_[0];
479 511
480 # the following rules are rather effective on some newsgroups, 512 # the following rules are rather effective on some newsgroups,
481 # like alt.binaries.games.anime, where non-mime, uuencoded data 513 # like alt.binaries.games.anime, where non-mime, uuencoded data
482 # is very common 514 # is very common
483 515
484 # if we find some *.rar, take it as the filename 516 # if we find some *.rar, take it as the filename
485 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 517 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
486 518
487 # one common subject format 519 # one common subject format
488 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 520 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
489 521
490 # - filename.par (04/55) 522 # - filename.par (04/55)
491 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 523 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
492 524
493 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 525 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
494 # - (20 files) No.17 Roseanne.jpg [2/2] 526 # - (20 files) No.17 Roseanne.jpg [2/2]
495 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 527 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
496 528
529 # try to detect some common forms of filenames
530 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
531
497 # otherwise just pass what we have 532 # otherwise just pass what we have
498 return (); 533 ()
499 }; 534 };
500 535
501 # now read all files in the directory uusrc/* 536 # now read all files in the directory uusrc/*
502 for(<uusrc/*>) { 537 for (<uusrc/*>) {
503 my($retval,$count)=LoadFile ($_, $_, 1); 538 my ($retval, $count) = LoadFile ($_, $_, 1);
504 print "file($_), status(", strerror $retval, ") parts($count)\n"; 539 print "file($_), status(", strerror $retval, ") parts($count)\n";
505 } 540 }
506 541
507 SetOption OPT_SAVEPATH, "uudst/"; 542 SetOption OPT_SAVEPATH, "uudst/";
508 543
509 # now wade through all files and their source parts 544 # now wade through all files and their source parts
510 $i = 0; 545 for my $uu (GetFileList) {
511 while ($uu = GetFileListItem($i)) { 546 print "file ", $uu->filename, "\n";
512 $i++;
513 print "file nr. $i";
514 print " state ", $uu->state; 547 print " state ", $uu->state, "\n";
515 print " mode ", $uu->mode; 548 print " mode ", $uu->mode, "\n";
516 print " uudet ", strencoding $uu->uudet; 549 print " uudet ", strencoding $uu->uudet, "\n";
517 print " size ", $uu->size; 550 print " size ", $uu->size, "\n";
518 print " filename ", $uu->filename;
519 print " subfname ", $uu->subfname; 551 print " subfname ", $uu->subfname, "\n";
520 print " mimeid ", $uu->mimeid; 552 print " mimeid ", $uu->mimeid, "\n";
521 print " mimetype ", $uu->mimetype; 553 print " mimetype ", $uu->mimetype, "\n";
522 print "\n";
523 554
524 # print additional info about all parts 555 # print additional info about all parts
556 print " parts";
525 for ($uu->parts) { 557 for ($uu->parts) {
526 while (my ($k, $v) = each %$_) { 558 for my $k (sort keys %$_) {
527 print "$k > $v, "; 559 print " $k=$_->{$k}";
528 } 560 }
529 print "\n"; 561 print "\n";
530 } 562 }
531 563
532 $uu->decode_temp;
533 print " temporarily decoded to ", $uu->binfile, "\n";
534 $uu->remove_temp; 564 $uu->remove_temp;
535 565
536 print strerror $uu->decode; 566 if (my $err = $uu->decode) {
567 print " ERROR ", strerror $err, "\n";
568 } else {
537 print " saved as uudst/", $uu->filename, "\n"; 569 print " successfully saved as uudst/", $uu->filename, "\n";
538 } 570 }
571 }
539 572
540 print "cleanup...\n"; 573 print "cleanup...\n";
541 574
542 CleanUp(); 575 CleanUp;
576
577=head1 PERLMULTICORE SUPPORT
578
579This module supports the perlmulticore standard (see
580L<http://perlmulticore.schmorp.de/> for more info) for the following
581functions - generally these are functions accessing the disk and/or using
582considerable CPU time:
583
584 LoadFile
585 $item->decode
586 $item->decode_temp
587 $item->remove_temp
588 $item->info
589
590The perl interpreter will be reacquired/released on every callback
591invocation, so for performance reasons, callbacks should be avoided if
592that is costly.
593
594Future versions might enable multicore support for more functions.
595
596=head1 BUGS AND LIMITATIONS
597
598The original uulib library this module uses was written at a time where
599main memory of measured in megabytes and buffer overflows as a security
600thign didn't exist. While a lot of security fixes have been applied over
601the years (includign some defense in depth mechanism that can shield
602against a lot of as-of-yet undetected bugs), using this library for
603security purposes requires care.
604
605Likewise, file sizes when the uulib library was written were tiny compared
606to today, so do not expect this library to handle files larger than 2GB.
543 607
544=head1 AUTHOR 608=head1 AUTHOR
545 609
546Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written 610Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
547by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 611by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
548bugfixed by Marc Lehmann. 612bugfixed by Marc Lehmann.
549 613
550=head1 SEE ALSO 614=head1 SEE ALSO
551 615
552perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 616perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
553 617
554=cut 618=cut
619

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines