ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.27 by root, Sat Dec 16 22:53:30 2006 UTC vs.
Revision 1.49 by root, Fri Feb 28 17:33:09 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3use common::sense;
2 4
3use Carp; 5use Carp;
4 6
5require Exporter; 7require Exporter;
6require DynaLoader; 8require DynaLoader;
7 9
8$VERSION = '1.08'; 10our $VERSION = 1.62;
9 11
10@ISA = qw(Exporter DynaLoader); 12our @ISA = qw(Exporter DynaLoader);
11 13
12@_consts = qw( 14our @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 15 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14 16
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 17 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 18 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17 19
19 21
20 OPT_RBUF OPT_WBUF 22 OPT_RBUF OPT_WBUF
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 23 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 24 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 25 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 26 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
25 27
26 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 28 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 29 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28 30
29 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 31 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED 32 XX_ENCODED UU_ENCODED YENC_ENCODED
31); 33);
32 34
33@_funcs = qw( 35our @_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 36 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 37 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 38 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
37 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 39 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 40 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39 E_PrepPartial 41 E_PrepPartial
40 42
41 straction strencoding strmsglevel 43 straction strencoding strmsglevel
42); 44);
43 45
44@EXPORT = @_consts; 46our @EXPORT = @_consts;
45@EXPORT_OK = @_funcs; 47our @EXPORT_OK = @_funcs;
46%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 48our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47 49
48bootstrap Convert::UUlib $VERSION; 50bootstrap Convert::UUlib $VERSION;
49 51
50Initialize(); 52# dummy function for compatiiblity with pre-1.7 versions
51 53sub Initialize { }
52# not when < 5.005_6x
53# END { CleanUp() }
54
55for (@_consts) {
56 my $constant = constant($_);
57 *$_ = sub () { $constant };
58}
59 54
60# action code -> string mapping 55# action code -> string mapping
61sub straction($) { 56sub straction($) {
62 return 'copying' if $_[0] == &ACT_COPYING; 57 return 'copying' if $_[0] == &ACT_COPYING;
63 return 'decoding' if $_[0] == &ACT_DECODING; 58 return 'decoding' if $_[0] == &ACT_DECODING;
101 use Convert::UUlib ':all'; 96 use Convert::UUlib ':all';
102 97
103 # read all the files named on the commandline and decode them 98 # read all the files named on the commandline and decode them
104 # into the CURRENT directory. See below for a longer example. 99 # into the CURRENT directory. See below for a longer example.
105 LoadFile $_ for @ARGV; 100 LoadFile $_ for @ARGV;
106 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 101
102 for my $uu (GetFileList) {
107 if ($uu->state & FILE_OK) { 103 if ($uu->state & FILE_OK) {
108 $uu->decode; 104 $uu->decode;
109 print $uu->filename, "\n"; 105 print $uu->filename, "\n";
110 } 106 }
111 } 107 }
155 OPT_TINYB64 detect short B64 outside of Mime 151 OPT_TINYB64 detect short B64 outside of Mime
156 OPT_ENCEXT extension for single-part encoded files 152 OPT_ENCEXT extension for single-part encoded files
157 OPT_REMOVE remove input files after decoding (dangerous) 153 OPT_REMOVE remove input files after decoding (dangerous)
158 OPT_MOREMIME strict MIME adherence 154 OPT_MOREMIME strict MIME adherence
159 OPT_DOTDOT ".."-unescaping has not yet been done on input files 155 OPT_DOTDOT ".."-unescaping has not yet been done on input files
160 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL* 156 OPT_RBUF set default read I/O buffer size in bytes
161 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL* 157 OPT_WBUF set default write I/O buffer size in bytes
158 OPT_AUTOCHECK automatically check file list after every loadfile
162 159
163=head2 Result/Error codes 160=head2 Result/Error codes
164 161
165 RET_OK everything went fine 162 RET_OK everything went fine
166 RET_IOERR I/O Error - examine errno 163 RET_IOERR I/O Error - examine errno
209again. 206again.
210 207
211On my machine, a fairly complete decode with DBI backend needs about 10MB 208On my machine, a fairly complete decode with DBI backend needs about 10MB
212RSS to decode 20000 files. 209RSS to decode 20000 files.
213 210
214=over 4 211=over
215
216=item Initialize
217
218Not normally necessary, (re-)initializes the library.
219 212
220=item CleanUp 213=item CleanUp
221 214
222Not normally necessary, could be called at the end to release memory 215Release memory, file items and clean up files. Should be called after a
223before starting a new decoding round. 216decoidng run, if you want to start a new one.
224 217
225=back 218=back
226 219
227=head2 Setting and querying options 220=head2 Setting and querying options
228 221
229=over 4 222=over
230 223
231=item $option = GetOption OPT_xxx 224=item $option = GetOption OPT_xxx
232 225
233=item SetOption OPT_xxx, opt-value 226=item SetOption OPT_xxx, opt-value
234 227
236 229
237See the C<OPT_xxx> constants above to see which options exist. 230See the C<OPT_xxx> constants above to see which options exist.
238 231
239=head2 Setting various callbacks 232=head2 Setting various callbacks
240 233
241=over 4 234=over
242 235
243=item SetMsgCallback [callback-function] 236=item SetMsgCallback [callback-function]
244 237
245=item SetBusyCallback [callback-function] 238=item SetBusyCallback [callback-function]
246 239
250 243
251=back 244=back
252 245
253=head2 Call the currently selected FNameFilter 246=head2 Call the currently selected FNameFilter
254 247
255=over 4 248=over
256 249
257=item $file = FNameFilter $file 250=item $file = FNameFilter $file
258 251
259=back 252=back
260 253
261=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 254=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
262 255
263=over 4 256=over
264 257
265=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] 258=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
266 259
267Load the given file and scan it for encoded contents. Optionally tag it 260Load the given file and scan it for encoded contents. Optionally tag it
268with the given id, and if C<$delflag> is true, delete the file after it 261with the given id, and if C<$delflag> is true, delete the file after it
277If you are desperate, try to call C<Smerge> with increasing C<$pass> 270If you are desperate, try to call C<Smerge> with increasing C<$pass>
278values, beginning at C<0>, to try to merge parts that usually would not 271values, beginning at C<0>, to try to merge parts that usually would not
279have been merged. 272have been merged.
280 273
281Most probably this will result in garbled files, so never do this by 274Most probably this will result in garbled files, so never do this by
282default. 275default, except:
276
277If the C<OPT_AUTOCHECK> option has been disabled (by default it is
278enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
279after loading all files as an additional pre-pass (which is normally done
280by C<LoadFile>).
283 281
284=item $item = GetFileListItem $item_number 282=item $item = GetFileListItem $item_number
285 283
286Return the C<$item> structure for the C<$item_number>'th found file, or 284Return the C<$item> structure for the C<$item_number>'th found file, or
287C<undef> of no file with that number exists. 285C<undef> of no file with that number exists.
288 286
289The first file has number C<0>, and the series has no holes, so you can 287The first file has number C<0>, and the series has no holes, so you can
290iterate over all files by starting with zero and incrementing until you 288iterate over all files by starting with zero and incrementing until you
291hit C<undef>. 289hit C<undef>.
292 290
291This function has to walk the linear list of fils on each access, so
292if you want to iterate over all items, it is usually faster to use
293C<GetFileList>.
294
295=item @items = GetFileList
296
297Similar to C<GetFileListItem>, but returns all files in one go.
298
293=back 299=back
294 300
295=head2 Decoding files 301=head2 Decoding files
296 302
297=over 4 303=over
298 304
299=item $retval = $item->rename($newname) 305=item $retval = $item->rename ($newname)
300 306
301Change the ondisk filename where the decoded file will be saved. 307Change the ondisk filename where the decoded file will be saved.
302 308
303=item $retval = $item->decode_temp 309=item $retval = $item->decode_temp
304 310
307 313
308=item $retval = $item->remove_temp 314=item $retval = $item->remove_temp
309 315
310Remove the temporarily decoded file again. 316Remove the temporarily decoded file again.
311 317
312=item $retval = $item->decode([$target_path]) 318=item $retval = $item->decode ([$target_path])
313 319
314Decode the file to it's destination, or the given target path. 320Decode the file to its destination, or the given target path.
315 321
316=item $retval = $item->info(callback-function) 322=item $retval = $item->info (callback-function)
317 323
318=back 324=back
319 325
320=head2 Querying (and setting) item attributes 326=head2 Querying (and setting) item attributes
321 327
322=over 4 328=over
323 329
324=item $state = $item->state 330=item $state = $item->state
325 331
326=item $mode = $item->mode([newmode]) 332=item $mode = $item->mode ([newmode])
327 333
328=item $uudet = $item->uudet 334=item $uudet = $item->uudet
329 335
330=item $size = $item->size 336=item $size = $item->size
331 337
332=item $filename = $item->filename([newfilename}) 338=item $filename = $item->filename ([newfilename})
333 339
334=item $subfname = $item->subfname 340=item $subfname = $item->subfname
335 341
336=item $mimeid = $item->mimeid 342=item $mimeid = $item->mimeid
337 343
341 347
342=back 348=back
343 349
344=head2 Information about source parts 350=head2 Information about source parts
345 351
346=over 4 352=over
347 353
348=item $parts = $item->parts 354=item $parts = $item->parts
349 355
350Return information about all parts (source files) used to decode the file 356Return information about all parts (source files) used to decode the file
351as a list of hashrefs with the following structure: 357as a list of hashrefs with the following structure:
365Usually you are interested mostly the C<sfname> and possibly the C<partno> 371Usually you are interested mostly the C<sfname> and possibly the C<partno>
366and C<filename> members. 372and C<filename> members.
367 373
368=back 374=back
369 375
370=head2 Functions below not documented and not very well tested 376=head2 Functions below are not documented and not very well tested - feedback welcome
371 377
372 QuickDecode 378 QuickDecode
373 EncodeMulti 379 EncodeMulti
374 EncodePartial 380 EncodePartial
375 EncodeToStream 381 EncodeToStream
379 385
380=head2 EXTENSION FUNCTIONS 386=head2 EXTENSION FUNCTIONS
381 387
382Functions found in this module but not documented in the uulib documentation: 388Functions found in this module but not documented in the uulib documentation:
383 389
384=over 4 390=over
385 391
386=item $msg = straction ACT_xxx 392=item $msg = straction ACT_xxx
387 393
388Return a human readable string representing the given action code. 394Return a human readable string representing the given action code.
389 395
430 436
431=back 437=back
432 438
433=head1 LARGE EXAMPLE DECODER 439=head1 LARGE EXAMPLE DECODER
434 440
441The general workflow for decoding is like this:
442
443=over
444
445=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
446
447=item 2. Load all source files with C<LoadFile>.
448
449=item 3. Optionally C<Smerge>.
450
451=item 4. Iterate over all C<GetFileList> items (i.e. result files).
452
453=item 5. C<CleanUp> to delete files and free items.
454
455=back
456
435This is the file C<example-decoder> from the distribution, put here 457What follows is the file C<example-decoder> from the distribution that
436instead of more thorough documentation. 458illustrates the above worklfow in a non-trivial example.
437 459
460 #!/usr/bin/perl
461
438 # decode all the files in the directory uusrc/ and copy 462 # decode all the files in the directory uusrc/ and copy
439 # the resulting files to uudst/ 463 # the resulting files to uudst/
440 464
441 use Convert::UUlib ':all'; 465 use Convert::UUlib ':all';
442 466
443 sub namefilter { 467 sub namefilter {
444 my($path)=@_; 468 my ($path) = @_;
469
445 $path=~s/^.*[\/\\]//; 470 $path=~s/^.*[\/\\]//;
471
446 $path; 472 $path
447 } 473 }
448 474
449 sub busycb { 475 sub busycb {
450 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 476 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
451 $_[0]=straction($action); 477 $_[0]=straction($action);
452 print "busy_callback(", (join ",",@_), ")\n"; 478 print "busy_callback(", (join ",",@_), ")\n";
453 0; 479 0
454 } 480 }
455 481
482 SetOption OPT_RBUF, 128*1024;
483 SetOption OPT_WBUF, 1024*1024;
456 SetOption OPT_IGNMODE, 1; 484 SetOption OPT_IGNMODE, 1;
485 SetOption OPT_IGNMODE, 1;
457 SetOption OPT_VERBOSE, 1; 486 SetOption OPT_VERBOSE, 1;
458 487
459 # show the three ways you can set callback functions. I normally 488 # show the three ways you can set callback functions. I normally
460 # prefer the one with the sub inplace. 489 # prefer the one with the sub inplace.
461 SetFNameFilter \&namefilter; 490 SetFNameFilter \&namefilter;
462 491
463 SetBusyCallback "busycb", 333; 492 SetBusyCallback "busycb", 333;
464 493
465 SetMsgCallback sub { 494 SetMsgCallback sub {
466 my ($msg, $level) = @_; 495 my ($msg, $level) = @_;
467 print uc strmsglevel $_[1], ": $msg\n"; 496 print uc strmsglevel $_[1], ": $msg\n";
468 }; 497 };
469 498
470 # the following non-trivial FileNameCallback takes care 499 # the following non-trivial FileNameCallback takes care
471 # of some subject lines not detected properly by uulib: 500 # of some subject lines not detected properly by uulib:
472 SetFileNameCallback sub { 501 SetFileNameCallback sub {
473 return unless $_[1]; # skip "Re:"-plies et al. 502 return unless $_[1]; # skip "Re:"-plies et al.
474 local $_ = $_[0]; 503 local $_ = $_[0];
475 504
476 # the following rules are rather effective on some newsgroups, 505 # the following rules are rather effective on some newsgroups,
477 # like alt.binaries.games.anime, where non-mime, uuencoded data 506 # like alt.binaries.games.anime, where non-mime, uuencoded data
478 # is very common 507 # is very common
479 508
480 # if we find some *.rar, take it as the filename 509 # if we find some *.rar, take it as the filename
481 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 510 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
482 511
483 # one common subject format 512 # one common subject format
484 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 513 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
485 514
486 # - filename.par (04/55) 515 # - filename.par (04/55)
487 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 516 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
488 517
489 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 518 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
490 # - (20 files) No.17 Roseanne.jpg [2/2] 519 # - (20 files) No.17 Roseanne.jpg [2/2]
491 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 520 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
492 521
522 # try to detect some common forms of filenames
523 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
524
493 # otherwise just pass what we have 525 # otherwise just pass what we have
494 return (); 526 ()
495 }; 527 };
496 528
497 # now read all files in the directory uusrc/* 529 # now read all files in the directory uusrc/*
498 for(<uusrc/*>) { 530 for (<uusrc/*>) {
499 my($retval,$count)=LoadFile ($_, $_, 1); 531 my ($retval, $count) = LoadFile ($_, $_, 1);
500 print "file($_), status(", strerror $retval, ") parts($count)\n"; 532 print "file($_), status(", strerror $retval, ") parts($count)\n";
501 } 533 }
502 534
503 SetOption OPT_SAVEPATH, "uudst/"; 535 SetOption OPT_SAVEPATH, "uudst/";
504 536
505 # now wade through all files and their source parts 537 # now wade through all files and their source parts
506 $i = 0; 538 for my $uu (GetFileList) {
507 while ($uu = GetFileListItem($i)) { 539 print "file ", $uu->filename, "\n";
508 $i++;
509 print "file nr. $i";
510 print " state ", $uu->state; 540 print " state ", $uu->state, "\n";
511 print " mode ", $uu->mode; 541 print " mode ", $uu->mode, "\n";
512 print " uudet ", strencoding $uu->uudet; 542 print " uudet ", strencoding $uu->uudet, "\n";
513 print " size ", $uu->size; 543 print " size ", $uu->size, "\n";
514 print " filename ", $uu->filename;
515 print " subfname ", $uu->subfname; 544 print " subfname ", $uu->subfname, "\n";
516 print " mimeid ", $uu->mimeid; 545 print " mimeid ", $uu->mimeid, "\n";
517 print " mimetype ", $uu->mimetype; 546 print " mimetype ", $uu->mimetype, "\n";
518 print "\n";
519 547
520 # print additional info about all parts 548 # print additional info about all parts
549 print " parts";
521 for ($uu->parts) { 550 for ($uu->parts) {
522 while (my ($k, $v) = each %$_) { 551 for my $k (sort keys %$_) {
523 print "$k > $v, "; 552 print " $k=$_->{$k}";
524 } 553 }
525 print "\n"; 554 print "\n";
526 } 555 }
527 556
528 $uu->decode_temp;
529 print " temporarily decoded to ", $uu->binfile, "\n";
530 $uu->remove_temp; 557 $uu->remove_temp;
531 558
532 print strerror $uu->decode; 559 if (my $err = $uu->decode) {
560 print " ERROR ", strerror $err, "\n";
561 } else {
533 print " saved as uudst/", $uu->filename, "\n"; 562 print " successfully saved as uudst/", $uu->filename, "\n";
534 } 563 }
564 }
535 565
536 print "cleanup...\n"; 566 print "cleanup...\n";
537 567
538 CleanUp(); 568 CleanUp;
569
570=head1 PERLMULTICORE SUPPORT
571
572This module supports the perlmulticore standard (see
573L<http://perlmulticore.schmorp.de/> for more info) for the following
574functions - generally these are functions accessing the disk and/or using
575considerable CPU time:
576
577 LoadFile
578 $item->decode
579 $item->decode_temp
580 $item->remove_temp
581 $item->info
582
583The perl interpreter will be reacquired/released on every callback
584invocation, so for performance reasons, callbacks should be avoided if
585that is costly.
586
587Future versions might enable multicore support for more functions.
588
589=head1 BUGS AND LIMITATIONS
590
591The original uulib library this module uses was written at a time where
592main memory of measured in megabytes and buffer overflows as a security
593thign didn't exist. While a lot of security fixes have been applied over
594the years (includign some defense in depth mechanism that can shield
595against a lot of as-of-yet undetected bugs), using this library for
596security purposes requires care.
597
598Likewise, file sizes when the uulib library was written were tiny compared
599to today, so do not expect this library to handle files larger than 2GB.
539 600
540=head1 AUTHOR 601=head1 AUTHOR
541 602
542Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written 603Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
543by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 604by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
544bugfixed by Marc Lehmann. 605bugfixed by Marc Lehmann.
545 606
546=head1 SEE ALSO 607=head1 SEE ALSO
547 608
548perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 609perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
549 610
550=cut 611=cut
612

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines