ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.23 by root, Thu Mar 3 17:13:33 2005 UTC vs.
Revision 1.53 by root, Sat Dec 12 02:03:14 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3use common::sense;
2 4
3use Carp; 5use Carp;
4 6
5require Exporter; 7require Exporter;
6require DynaLoader; 8require DynaLoader;
7 9
8$VERSION = "1.051"; 10our $VERSION = 1.71;
9 11
10@ISA = qw(Exporter DynaLoader); 12our @ISA = qw(Exporter DynaLoader);
11 13
12@_consts = qw( 14our @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 15 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14 16
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 17 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 18 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17 19
18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 20 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19 21
22 OPT_RBUF OPT_WBUF
20 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 23 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
21 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 24 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
22 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 25 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
23 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 26 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
24 27
25 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 28 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
26 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 29 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
27 30
28 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 31 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
29 XX_ENCODED UU_ENCODED YENC_ENCODED 32 XX_ENCODED UU_ENCODED YENC_ENCODED
30); 33);
31 34
32@_funcs = qw( 35our @_funcs = qw(
33 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 36 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
34 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 37 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
35 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 38 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
36 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 39 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
37 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 40 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
38 E_PrepPartial 41 E_PrepPartial
39 42
40 straction strencoding strmsglevel 43 straction strencoding strmsglevel
41); 44);
42 45
43@EXPORT = @_consts; 46our @EXPORT = @_consts;
44@EXPORT_OK = @_funcs; 47our @EXPORT_OK = @_funcs;
45%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 48our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
46 49
47bootstrap Convert::UUlib $VERSION; 50bootstrap Convert::UUlib $VERSION;
48 51
49Initialize(); 52# dummy function for compatiiblity with pre-1.7 versions
50 53sub Initialize { }
51# not when < 5.005_6x
52# END { CleanUp() }
53
54for (@_consts) {
55 my $constant = constant($_);
56 *$_ = sub () { $constant };
57}
58 54
59# action code -> string mapping 55# action code -> string mapping
60sub straction($) { 56sub straction($) {
61 return 'copying' if $_[0] == &ACT_COPYING; 57 return 'copying' if $_[0] == &ACT_COPYING;
62 return 'decoding' if $_[0] == &ACT_DECODING; 58 return 'decoding' if $_[0] == &ACT_DECODING;
100 use Convert::UUlib ':all'; 96 use Convert::UUlib ':all';
101 97
102 # read all the files named on the commandline and decode them 98 # read all the files named on the commandline and decode them
103 # into the CURRENT directory. See below for a longer example. 99 # into the CURRENT directory. See below for a longer example.
104 LoadFile $_ for @ARGV; 100 LoadFile $_ for @ARGV;
105 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 101
102 for my $uu (GetFileList) {
106 if ($uu->state & FILE_OK) { 103 if ($uu->state & FILE_OK) {
107 $uu->decode; 104 $uu->decode;
108 print $uu->filename, "\n"; 105 print $uu->filename, "\n";
109 } 106 }
110 } 107 }
154 OPT_TINYB64 detect short B64 outside of Mime 151 OPT_TINYB64 detect short B64 outside of Mime
155 OPT_ENCEXT extension for single-part encoded files 152 OPT_ENCEXT extension for single-part encoded files
156 OPT_REMOVE remove input files after decoding (dangerous) 153 OPT_REMOVE remove input files after decoding (dangerous)
157 OPT_MOREMIME strict MIME adherence 154 OPT_MOREMIME strict MIME adherence
158 OPT_DOTDOT ".."-unescaping has not yet been done on input files 155 OPT_DOTDOT ".."-unescaping has not yet been done on input files
156 OPT_RBUF set default read I/O buffer size in bytes
157 OPT_WBUF set default write I/O buffer size in bytes
158 OPT_AUTOCHECK automatically check file list after every loadfile
159 159
160=head2 Result/Error codes 160=head2 Result/Error codes
161 161
162 RET_OK everything went fine 162 RET_OK everything went fine
163 RET_IOERR I/O Error - examine errno 163 RET_IOERR I/O Error - examine errno
206again. 206again.
207 207
208On my machine, a fairly complete decode with DBI backend needs about 10MB 208On my machine, a fairly complete decode with DBI backend needs about 10MB
209RSS to decode 20000 files. 209RSS to decode 20000 files.
210 210
211=over 4 211=over
212
213=item Initialize
214
215Not normally necessary, (re-)initializes the library.
216 212
217=item CleanUp 213=item CleanUp
218 214
219Not normally necessary, could be called at the end to release memory 215Release memory, file items and clean up files. Should be called after a
220before starting a new decoding round. 216decoidng run, if you want to start a new one.
221 217
222=back 218=back
223 219
224=head2 Setting and querying options 220=head2 Setting and querying options
225 221
226=over 4 222=over
227 223
228=item $option = GetOption OPT_xxx 224=item $option = GetOption OPT_xxx
229 225
230=item SetOption OPT_xxx, opt-value 226=item SetOption OPT_xxx, opt-value
231 227
233 229
234See the C<OPT_xxx> constants above to see which options exist. 230See the C<OPT_xxx> constants above to see which options exist.
235 231
236=head2 Setting various callbacks 232=head2 Setting various callbacks
237 233
238=over 4 234=over
239 235
240=item SetMsgCallback [callback-function] 236=item SetMsgCallback [callback-function]
241 237
242=item SetBusyCallback [callback-function] 238=item SetBusyCallback [callback-function]
243 239
247 243
248=back 244=back
249 245
250=head2 Call the currently selected FNameFilter 246=head2 Call the currently selected FNameFilter
251 247
252=over 4 248=over
253 249
254=item $file = FNameFilter $file 250=item $file = FNameFilter $file
255 251
256=back 252=back
257 253
258=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 254=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
259 255
260=over 4 256=over
261 257
262=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] 258=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
263 259
264Load the given file and scan it for encoded contents. Optionally tag it 260Load the given file and scan it for encoded contents. Optionally tag it
265with the given id, and if C<$delflag> is true, delete the file after it 261with the given id, and if C<$delflag> is true, delete the file after it
274If you are desperate, try to call C<Smerge> with increasing C<$pass> 270If you are desperate, try to call C<Smerge> with increasing C<$pass>
275values, beginning at C<0>, to try to merge parts that usually would not 271values, beginning at C<0>, to try to merge parts that usually would not
276have been merged. 272have been merged.
277 273
278Most probably this will result in garbled files, so never do this by 274Most probably this will result in garbled files, so never do this by
279default. 275default, except:
276
277If the C<OPT_AUTOCHECK> option has been disabled (by default it is
278enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
279after loading all files as an additional pre-pass (which is normally done
280by C<LoadFile>).
280 281
281=item $item = GetFileListItem $item_number 282=item $item = GetFileListItem $item_number
282 283
283Return the C<$item> structure for the C<$item_number>'th found file, or 284Return the C<$item> structure for the C<$item_number>'th found file, or
284C<undef> of no file with that number exists. 285C<undef> of no file with that number exists.
285 286
286The first file has number C<0>, and the series has no holes, so you can 287The first file has number C<0>, and the series has no holes, so you can
287iterate over all files by starting with zero and incrementing until you 288iterate over all files by starting with zero and incrementing until you
288hit C<undef>. 289hit C<undef>.
289 290
291This function has to walk the linear list of fils on each access, so
292if you want to iterate over all items, it is usually faster to use
293C<GetFileList>.
294
295=item @items = GetFileList
296
297Similar to C<GetFileListItem>, but returns all files in one go.
298
290=back 299=back
291 300
292=head2 Decoding files 301=head2 Decoding files
293 302
294=over 4 303=over
295 304
296=item $retval = $item->rename($newname) 305=item $retval = $item->rename ($newname)
297 306
298Change the ondisk filename where the decoded file will be saved. 307Change the ondisk filename where the decoded file will be saved.
299 308
300=item $retval = $item->decode_temp 309=item $retval = $item->decode_temp
301 310
304 313
305=item $retval = $item->remove_temp 314=item $retval = $item->remove_temp
306 315
307Remove the temporarily decoded file again. 316Remove the temporarily decoded file again.
308 317
309=item $retval = $item->decode([$target_path]) 318=item $retval = $item->decode ([$target_path])
310 319
311Decode the file to it's destination, or the given target path. 320Decode the file to its destination, or the given target path.
312 321
313=item $retval = $item->info(callback-function) 322=item $retval = $item->info (callback-function)
314 323
315=back 324=back
316 325
317=head2 Querying (and setting) item attributes 326=head2 Querying (and setting) item attributes
318 327
319=over 4 328=over
320 329
321=item $state = $item->state 330=item $state = $item->state
322 331
323=item $mode = $item->mode([newmode]) 332=item $mode = $item->mode ([newmode])
324 333
325=item $uudet = $item->uudet 334=item $uudet = $item->uudet
326 335
327=item $size = $item->size 336=item $size = $item->size
328 337
329=item $filename = $item->filename([newfilename}) 338=item $filename = $item->filename ([newfilename})
330 339
331=item $subfname = $item->subfname 340=item $subfname = $item->subfname
332 341
333=item $mimeid = $item->mimeid 342=item $mimeid = $item->mimeid
334 343
338 347
339=back 348=back
340 349
341=head2 Information about source parts 350=head2 Information about source parts
342 351
343=over 4 352=over
344 353
345=item $parts = $item->parts 354=item $parts = $item->parts
346 355
347Return information about all parts (source files) used to decode the file 356Return information about all parts (source files) used to decode the file
348as a list of hashrefs with the following structure: 357as a list of hashrefs with the following structure:
362Usually you are interested mostly the C<sfname> and possibly the C<partno> 371Usually you are interested mostly the C<sfname> and possibly the C<partno>
363and C<filename> members. 372and C<filename> members.
364 373
365=back 374=back
366 375
367=head2 Functions below not documented and not very well tested 376=head2 Functions below are not documented and not very well tested - feedback welcome
368 377
369 QuickDecode 378 QuickDecode
370 EncodeMulti 379 EncodeMulti
371 EncodePartial 380 EncodePartial
372 EncodeToStream 381 EncodeToStream
376 385
377=head2 EXTENSION FUNCTIONS 386=head2 EXTENSION FUNCTIONS
378 387
379Functions found in this module but not documented in the uulib documentation: 388Functions found in this module but not documented in the uulib documentation:
380 389
381=over 4 390=over
382 391
383=item $msg = straction ACT_xxx 392=item $msg = straction ACT_xxx
384 393
385Return a human readable string representing the given action code. 394Return a human readable string representing the given action code.
386 395
427 436
428=back 437=back
429 438
430=head1 LARGE EXAMPLE DECODER 439=head1 LARGE EXAMPLE DECODER
431 440
441The general workflow for decoding is like this:
442
443=over
444
445=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
446
447=item 2. Load all source files with C<LoadFile>.
448
449=item 3. Optionally C<Smerge>.
450
451=item 4. Iterate over all C<GetFileList> items (i.e. result files).
452
453=item 5. C<CleanUp> to delete files and free items.
454
455=back
456
432This is the file C<example-decoder> from the distribution, put here 457What follows is the file C<example-decoder> from the distribution that
433instead of more thorough documentation. 458illustrates the above worklfow in a non-trivial example.
434 459
460 #!/usr/bin/perl
461
435 # decode all the files in the directory uusrc/ and copy 462 # decode all the files in the directory uusrc/ and copy
436 # the resulting files to uudst/ 463 # the resulting files to uudst/
437 464
438 use Convert::UUlib ':all'; 465 use Convert::UUlib ':all';
439 466
440 sub namefilter { 467 sub namefilter {
441 my($path)=@_; 468 my ($path) = @_;
469
442 $path=~s/^.*[\/\\]//; 470 $path=~s/^.*[\/\\]//;
471
443 $path; 472 $path
444 } 473 }
445 474
446 sub busycb { 475 sub busycb {
447 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 476 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
448 $_[0]=straction($action); 477 $_[0]=straction($action);
449 print "busy_callback(", (join ",",@_), ")\n"; 478 print "busy_callback(", (join ",",@_), ")\n";
450 0; 479 0
451 } 480 }
452 481
482 SetOption OPT_RBUF, 128*1024;
483 SetOption OPT_WBUF, 1024*1024;
453 SetOption OPT_IGNMODE, 1; 484 SetOption OPT_IGNMODE, 1;
485 SetOption OPT_IGNMODE, 1;
454 SetOption OPT_VERBOSE, 1; 486 SetOption OPT_VERBOSE, 1;
487 SetOption OPT_AUTOCHK, 0;
455 488
456 # show the three ways you can set callback functions. I normally 489 # show the three ways you can set callback functions. I normally
457 # prefer the one with the sub inplace. 490 # prefer the one with the sub inplace.
458 SetFNameFilter \&namefilter; 491 SetFNameFilter \&namefilter;
459 492
460 SetBusyCallback "busycb", 333; 493 SetBusyCallback "busycb", 333;
461 494
462 SetMsgCallback sub { 495 SetMsgCallback sub {
463 my ($msg, $level) = @_; 496 my ($msg, $level) = @_;
464 print uc strmsglevel $_[1], ": $msg\n"; 497 print uc strmsglevel $_[1], ": $msg\n";
465 }; 498 };
466 499
467 # the following non-trivial FileNameCallback takes care 500 # the following non-trivial FileNameCallback takes care
468 # of some subject lines not detected properly by uulib: 501 # of some subject lines not detected properly by uulib:
469 SetFileNameCallback sub { 502 SetFileNameCallback sub {
470 return unless $_[1]; # skip "Re:"-plies et al. 503 return unless $_[1]; # skip "Re:"-plies et al.
471 local $_ = $_[0]; 504 local $_ = $_[0];
472 505
473 # the following rules are rather effective on some newsgroups, 506 # the following rules are rather effective on some newsgroups,
474 # like alt.binaries.games.anime, where non-mime, uuencoded data 507 # like alt.binaries.games.anime, where non-mime, uuencoded data
475 # is very common 508 # is very common
476 509
477 # if we find some *.rar, take it as the filename 510 # if we find some *.rar, take it as the filename
478 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 511 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
479 512
480 # one common subject format 513 # one common subject format
481 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 514 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
482 515
483 # - filename.par (04/55) 516 # - filename.par (04/55)
484 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 517 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
485 518
486 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 519 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
487 # - (20 files) No.17 Roseanne.jpg [2/2] 520 # - (20 files) No.17 Roseanne.jpg [2/2]
488 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 521 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
489 522
523 # try to detect some common forms of filenames
524 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
525
490 # otherwise just pass what we have 526 # otherwise just pass what we have
491 return (); 527 ()
492 }; 528 };
493 529
494 # now read all files in the directory uusrc/* 530 # now read all files in the directory uusrc/*
495 for(<uusrc/*>) { 531 for (<uusrc/*>) {
496 my($retval,$count)=LoadFile ($_, $_, 1); 532 my ($retval, $count) = LoadFile ($_, $_, 1);
497 print "file($_), status(", strerror $retval, ") parts($count)\n"; 533 print "file($_), status(", strerror $retval, ") parts($count)\n";
498 } 534 }
499 535
536 Smerge -1;
537
500 SetOption OPT_SAVEPATH, "uudst/"; 538 SetOption OPT_SAVEPATH, "uudst/";
501 539
502 # now wade through all files and their source parts 540 # now wade through all files and their source parts
503 $i = 0; 541 for my $uu (GetFileList) {
504 while ($uu = GetFileListItem($i)) { 542 print "file ", $uu->filename, "\n";
505 $i++;
506 print "file nr. $i";
507 print " state ", $uu->state; 543 print " state ", $uu->state, "\n";
508 print " mode ", $uu->mode; 544 print " mode ", $uu->mode, "\n";
509 print " uudet ", strencoding $uu->uudet; 545 print " uudet ", strencoding $uu->uudet, "\n";
510 print " size ", $uu->size; 546 print " size ", $uu->size, "\n";
511 print " filename ", $uu->filename;
512 print " subfname ", $uu->subfname; 547 print " subfname ", $uu->subfname, "\n";
513 print " mimeid ", $uu->mimeid; 548 print " mimeid ", $uu->mimeid, "\n";
514 print " mimetype ", $uu->mimetype; 549 print " mimetype ", $uu->mimetype, "\n";
515 print "\n";
516 550
517 # print additional info about all parts 551 # print additional info about all parts
552 print " parts";
518 for ($uu->parts) { 553 for ($uu->parts) {
519 while (my ($k, $v) = each %$_) { 554 for my $k (sort keys %$_) {
520 print "$k > $v, "; 555 print " $k=$_->{$k}";
521 } 556 }
522 print "\n"; 557 print "\n";
523 } 558 }
524 559
525 $uu->decode_temp;
526 print " temporarily decoded to ", $uu->binfile, "\n";
527 $uu->remove_temp; 560 $uu->remove_temp;
528 561
529 print strerror $uu->decode; 562 if (my $err = $uu->decode) {
563 print " ERROR ", strerror $err, "\n";
564 } else {
530 print " saved as uudst/", $uu->filename, "\n"; 565 print " successfully saved as uudst/", $uu->filename, "\n";
531 } 566 }
567 }
532 568
533 print "cleanup...\n"; 569 print "cleanup...\n";
534 570
535 CleanUp(); 571 CleanUp;
572
573=head1 PERLMULTICORE SUPPORT
574
575This module supports the perlmulticore standard (see
576L<http://perlmulticore.schmorp.de/> for more info) for the following
577functions - generally these are functions accessing the disk and/or using
578considerable CPU time:
579
580 LoadFile
581 $item->decode
582 $item->decode_temp
583 $item->remove_temp
584 $item->info
585
586The perl interpreter will be reacquired/released on every callback
587invocation, so for performance reasons, callbacks should be avoided if
588that is costly.
589
590Future versions might enable multicore support for more functions.
591
592=head1 BUGS AND LIMITATIONS
593
594The original uulib library this module uses was written at a time where
595main memory of measured in megabytes and buffer overflows as a security
596thign didn't exist. While a lot of security fixes have been applied over
597the years (includign some defense in depth mechanism that can shield
598against a lot of as-of-yet undetected bugs), using this library for
599security purposes requires care.
600
601Likewise, file sizes when the uulib library was written were tiny compared
602to today, so do not expect this library to handle files larger than 2GB.
603
604Lastly, this module uses a very "C-like" interface, which means it doesn't
605protect you from invalid points as you might expect from "more perlish"
606modules - for example, accessing a file item object after callinbg
607C<CleanUp> will likely result in crashes, memory corruption, or worse.
536 608
537=head1 AUTHOR 609=head1 AUTHOR
538 610
539Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written 611Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
540by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 612by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
541bugfixed by Marc Lehmann. 613bugfixed by Marc Lehmann.
542 614
543=head1 SEE ALSO 615=head1 SEE ALSO
544 616
545perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 617perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
546 618
547=cut 619=cut
620

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines