ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.30 by root, Fri Jun 13 13:27:51 2008 UTC vs.
Revision 1.52 by root, Mon Mar 16 23:54:29 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2 2
3no warnings; 3use common::sense;
4use strict;
5 4
6use Carp; 5use Carp;
7 6
8require Exporter; 7require Exporter;
9require DynaLoader; 8require DynaLoader;
10 9
11our $VERSION = '1.10'; 10our $VERSION = 1.71;
12 11
13our @ISA = qw(Exporter DynaLoader); 12our @ISA = qw(Exporter DynaLoader);
14 13
15our @_consts = qw( 14our @_consts = qw(
16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 15 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
22 21
23 OPT_RBUF OPT_WBUF 22 OPT_RBUF OPT_WBUF
24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 23 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 24 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 25 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 26 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
28 27
29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 28 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 29 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
31 30
32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 31 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
34); 33);
35 34
36our @_funcs = qw( 35our @_funcs = qw(
37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 36 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 37 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 38 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 39 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 40 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
42 E_PrepPartial 41 E_PrepPartial
43 42
44 straction strencoding strmsglevel 43 straction strencoding strmsglevel
48our @EXPORT_OK = @_funcs; 47our @EXPORT_OK = @_funcs;
49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 48our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
50 49
51bootstrap Convert::UUlib $VERSION; 50bootstrap Convert::UUlib $VERSION;
52 51
53Initialize(); 52# dummy function for compatiiblity with pre-1.7 versions
54 53sub Initialize { }
55# not when < 5.005_6x
56# END { CleanUp() }
57
58for (@_consts) {
59 my $constant = constant($_);
60 no strict 'refs';
61 *$_ = sub () { $constant };
62}
63 54
64# action code -> string mapping 55# action code -> string mapping
65sub straction($) { 56sub straction($) {
66 return 'copying' if $_[0] == &ACT_COPYING; 57 return 'copying' if $_[0] == &ACT_COPYING;
67 return 'decoding' if $_[0] == &ACT_DECODING; 58 return 'decoding' if $_[0] == &ACT_DECODING;
105 use Convert::UUlib ':all'; 96 use Convert::UUlib ':all';
106 97
107 # read all the files named on the commandline and decode them 98 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example. 99 # into the CURRENT directory. See below for a longer example.
109 LoadFile $_ for @ARGV; 100 LoadFile $_ for @ARGV;
110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 101
102 for my $uu (GetFileList) {
111 if ($uu->state & FILE_OK) { 103 if ($uu->state & FILE_OK) {
112 $uu->decode; 104 $uu->decode;
113 print $uu->filename, "\n"; 105 print $uu->filename, "\n";
114 } 106 }
115 } 107 }
159 OPT_TINYB64 detect short B64 outside of Mime 151 OPT_TINYB64 detect short B64 outside of Mime
160 OPT_ENCEXT extension for single-part encoded files 152 OPT_ENCEXT extension for single-part encoded files
161 OPT_REMOVE remove input files after decoding (dangerous) 153 OPT_REMOVE remove input files after decoding (dangerous)
162 OPT_MOREMIME strict MIME adherence 154 OPT_MOREMIME strict MIME adherence
163 OPT_DOTDOT ".."-unescaping has not yet been done on input files 155 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL* 156 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL* 157 OPT_WBUF set default write I/O buffer size in bytes
158 OPT_AUTOCHECK automatically check file list after every loadfile
166 159
167=head2 Result/Error codes 160=head2 Result/Error codes
168 161
169 RET_OK everything went fine 162 RET_OK everything went fine
170 RET_IOERR I/O Error - examine errno 163 RET_IOERR I/O Error - examine errno
213again. 206again.
214 207
215On my machine, a fairly complete decode with DBI backend needs about 10MB 208On my machine, a fairly complete decode with DBI backend needs about 10MB
216RSS to decode 20000 files. 209RSS to decode 20000 files.
217 210
218=over 4 211=over
219
220=item Initialize
221
222Not normally necessary, (re-)initializes the library.
223 212
224=item CleanUp 213=item CleanUp
225 214
226Not normally necessary, could be called at the end to release memory 215Release memory, file items and clean up files. Should be called after a
227before starting a new decoding round. 216decoidng run, if you want to start a new one.
228 217
229=back 218=back
230 219
231=head2 Setting and querying options 220=head2 Setting and querying options
232 221
233=over 4 222=over
234 223
235=item $option = GetOption OPT_xxx 224=item $option = GetOption OPT_xxx
236 225
237=item SetOption OPT_xxx, opt-value 226=item SetOption OPT_xxx, opt-value
238 227
240 229
241See the C<OPT_xxx> constants above to see which options exist. 230See the C<OPT_xxx> constants above to see which options exist.
242 231
243=head2 Setting various callbacks 232=head2 Setting various callbacks
244 233
245=over 4 234=over
246 235
247=item SetMsgCallback [callback-function] 236=item SetMsgCallback [callback-function]
248 237
249=item SetBusyCallback [callback-function] 238=item SetBusyCallback [callback-function]
250 239
254 243
255=back 244=back
256 245
257=head2 Call the currently selected FNameFilter 246=head2 Call the currently selected FNameFilter
258 247
259=over 4 248=over
260 249
261=item $file = FNameFilter $file 250=item $file = FNameFilter $file
262 251
263=back 252=back
264 253
265=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 254=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
266 255
267=over 4 256=over
268 257
269=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] 258=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
270 259
271Load the given file and scan it for encoded contents. Optionally tag it 260Load the given file and scan it for encoded contents. Optionally tag it
272with the given id, and if C<$delflag> is true, delete the file after it 261with the given id, and if C<$delflag> is true, delete the file after it
281If you are desperate, try to call C<Smerge> with increasing C<$pass> 270If you are desperate, try to call C<Smerge> with increasing C<$pass>
282values, beginning at C<0>, to try to merge parts that usually would not 271values, beginning at C<0>, to try to merge parts that usually would not
283have been merged. 272have been merged.
284 273
285Most probably this will result in garbled files, so never do this by 274Most probably this will result in garbled files, so never do this by
286default. 275default, except:
276
277If the C<OPT_AUTOCHECK> option has been disabled (by default it is
278enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
279after loading all files as an additional pre-pass (which is normally done
280by C<LoadFile>).
287 281
288=item $item = GetFileListItem $item_number 282=item $item = GetFileListItem $item_number
289 283
290Return the C<$item> structure for the C<$item_number>'th found file, or 284Return the C<$item> structure for the C<$item_number>'th found file, or
291C<undef> of no file with that number exists. 285C<undef> of no file with that number exists.
292 286
293The first file has number C<0>, and the series has no holes, so you can 287The first file has number C<0>, and the series has no holes, so you can
294iterate over all files by starting with zero and incrementing until you 288iterate over all files by starting with zero and incrementing until you
295hit C<undef>. 289hit C<undef>.
296 290
291This function has to walk the linear list of fils on each access, so
292if you want to iterate over all items, it is usually faster to use
293C<GetFileList>.
294
295=item @items = GetFileList
296
297Similar to C<GetFileListItem>, but returns all files in one go.
298
297=back 299=back
298 300
299=head2 Decoding files 301=head2 Decoding files
300 302
301=over 4 303=over
302 304
303=item $retval = $item->rename($newname) 305=item $retval = $item->rename ($newname)
304 306
305Change the ondisk filename where the decoded file will be saved. 307Change the ondisk filename where the decoded file will be saved.
306 308
307=item $retval = $item->decode_temp 309=item $retval = $item->decode_temp
308 310
311 313
312=item $retval = $item->remove_temp 314=item $retval = $item->remove_temp
313 315
314Remove the temporarily decoded file again. 316Remove the temporarily decoded file again.
315 317
316=item $retval = $item->decode([$target_path]) 318=item $retval = $item->decode ([$target_path])
317 319
318Decode the file to it's destination, or the given target path. 320Decode the file to its destination, or the given target path.
319 321
320=item $retval = $item->info(callback-function) 322=item $retval = $item->info (callback-function)
321 323
322=back 324=back
323 325
324=head2 Querying (and setting) item attributes 326=head2 Querying (and setting) item attributes
325 327
326=over 4 328=over
327 329
328=item $state = $item->state 330=item $state = $item->state
329 331
330=item $mode = $item->mode([newmode]) 332=item $mode = $item->mode ([newmode])
331 333
332=item $uudet = $item->uudet 334=item $uudet = $item->uudet
333 335
334=item $size = $item->size 336=item $size = $item->size
335 337
336=item $filename = $item->filename([newfilename}) 338=item $filename = $item->filename ([newfilename})
337 339
338=item $subfname = $item->subfname 340=item $subfname = $item->subfname
339 341
340=item $mimeid = $item->mimeid 342=item $mimeid = $item->mimeid
341 343
345 347
346=back 348=back
347 349
348=head2 Information about source parts 350=head2 Information about source parts
349 351
350=over 4 352=over
351 353
352=item $parts = $item->parts 354=item $parts = $item->parts
353 355
354Return information about all parts (source files) used to decode the file 356Return information about all parts (source files) used to decode the file
355as a list of hashrefs with the following structure: 357as a list of hashrefs with the following structure:
369Usually you are interested mostly the C<sfname> and possibly the C<partno> 371Usually you are interested mostly the C<sfname> and possibly the C<partno>
370and C<filename> members. 372and C<filename> members.
371 373
372=back 374=back
373 375
374=head2 Functions below not documented and not very well tested 376=head2 Functions below are not documented and not very well tested - feedback welcome
375 377
376 QuickDecode 378 QuickDecode
377 EncodeMulti 379 EncodeMulti
378 EncodePartial 380 EncodePartial
379 EncodeToStream 381 EncodeToStream
383 385
384=head2 EXTENSION FUNCTIONS 386=head2 EXTENSION FUNCTIONS
385 387
386Functions found in this module but not documented in the uulib documentation: 388Functions found in this module but not documented in the uulib documentation:
387 389
388=over 4 390=over
389 391
390=item $msg = straction ACT_xxx 392=item $msg = straction ACT_xxx
391 393
392Return a human readable string representing the given action code. 394Return a human readable string representing the given action code.
393 395
434 436
435=back 437=back
436 438
437=head1 LARGE EXAMPLE DECODER 439=head1 LARGE EXAMPLE DECODER
438 440
441The general workflow for decoding is like this:
442
443=over
444
445=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
446
447=item 2. Load all source files with C<LoadFile>.
448
449=item 3. Optionally C<Smerge>.
450
451=item 4. Iterate over all C<GetFileList> items (i.e. result files).
452
453=item 5. C<CleanUp> to delete files and free items.
454
455=back
456
439This is the file C<example-decoder> from the distribution, put here 457What follows is the file C<example-decoder> from the distribution that
440instead of more thorough documentation. 458illustrates the above worklfow in a non-trivial example.
441 459
460 #!/usr/bin/perl
461
442 # decode all the files in the directory uusrc/ and copy 462 # decode all the files in the directory uusrc/ and copy
443 # the resulting files to uudst/ 463 # the resulting files to uudst/
444 464
445 use Convert::UUlib ':all'; 465 use Convert::UUlib ':all';
446 466
447 sub namefilter { 467 sub namefilter {
448 my($path)=@_; 468 my ($path) = @_;
469
449 $path=~s/^.*[\/\\]//; 470 $path=~s/^.*[\/\\]//;
471
450 $path; 472 $path
451 } 473 }
452 474
453 sub busycb { 475 sub busycb {
454 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 476 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
455 $_[0]=straction($action); 477 $_[0]=straction($action);
456 print "busy_callback(", (join ",",@_), ")\n"; 478 print "busy_callback(", (join ",",@_), ")\n";
457 0; 479 0
458 } 480 }
459 481
482 SetOption OPT_RBUF, 128*1024;
483 SetOption OPT_WBUF, 1024*1024;
460 SetOption OPT_IGNMODE, 1; 484 SetOption OPT_IGNMODE, 1;
485 SetOption OPT_IGNMODE, 1;
461 SetOption OPT_VERBOSE, 1; 486 SetOption OPT_VERBOSE, 1;
462 487
463 # show the three ways you can set callback functions. I normally 488 # show the three ways you can set callback functions. I normally
464 # prefer the one with the sub inplace. 489 # prefer the one with the sub inplace.
465 SetFNameFilter \&namefilter; 490 SetFNameFilter \&namefilter;
466 491
467 SetBusyCallback "busycb", 333; 492 SetBusyCallback "busycb", 333;
468 493
469 SetMsgCallback sub { 494 SetMsgCallback sub {
470 my ($msg, $level) = @_; 495 my ($msg, $level) = @_;
471 print uc strmsglevel $_[1], ": $msg\n"; 496 print uc strmsglevel $_[1], ": $msg\n";
472 }; 497 };
473 498
474 # the following non-trivial FileNameCallback takes care 499 # the following non-trivial FileNameCallback takes care
475 # of some subject lines not detected properly by uulib: 500 # of some subject lines not detected properly by uulib:
476 SetFileNameCallback sub { 501 SetFileNameCallback sub {
477 return unless $_[1]; # skip "Re:"-plies et al. 502 return unless $_[1]; # skip "Re:"-plies et al.
478 local $_ = $_[0]; 503 local $_ = $_[0];
479 504
480 # the following rules are rather effective on some newsgroups, 505 # the following rules are rather effective on some newsgroups,
481 # like alt.binaries.games.anime, where non-mime, uuencoded data 506 # like alt.binaries.games.anime, where non-mime, uuencoded data
482 # is very common 507 # is very common
483 508
484 # if we find some *.rar, take it as the filename 509 # if we find some *.rar, take it as the filename
485 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 510 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
486 511
487 # one common subject format 512 # one common subject format
488 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 513 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
489 514
490 # - filename.par (04/55) 515 # - filename.par (04/55)
491 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 516 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
492 517
493 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 518 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
494 # - (20 files) No.17 Roseanne.jpg [2/2] 519 # - (20 files) No.17 Roseanne.jpg [2/2]
495 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 520 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
496 521
522 # try to detect some common forms of filenames
523 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
524
497 # otherwise just pass what we have 525 # otherwise just pass what we have
498 return (); 526 ()
499 }; 527 };
500 528
501 # now read all files in the directory uusrc/* 529 # now read all files in the directory uusrc/*
502 for(<uusrc/*>) { 530 for (<uusrc/*>) {
503 my($retval,$count)=LoadFile ($_, $_, 1); 531 my ($retval, $count) = LoadFile ($_, $_, 1);
504 print "file($_), status(", strerror $retval, ") parts($count)\n"; 532 print "file($_), status(", strerror $retval, ") parts($count)\n";
505 } 533 }
506 534
507 SetOption OPT_SAVEPATH, "uudst/"; 535 SetOption OPT_SAVEPATH, "uudst/";
508 536
509 # now wade through all files and their source parts 537 # now wade through all files and their source parts
510 $i = 0; 538 for my $uu (GetFileList) {
511 while ($uu = GetFileListItem($i)) { 539 print "file ", $uu->filename, "\n";
512 $i++;
513 print "file nr. $i";
514 print " state ", $uu->state; 540 print " state ", $uu->state, "\n";
515 print " mode ", $uu->mode; 541 print " mode ", $uu->mode, "\n";
516 print " uudet ", strencoding $uu->uudet; 542 print " uudet ", strencoding $uu->uudet, "\n";
517 print " size ", $uu->size; 543 print " size ", $uu->size, "\n";
518 print " filename ", $uu->filename;
519 print " subfname ", $uu->subfname; 544 print " subfname ", $uu->subfname, "\n";
520 print " mimeid ", $uu->mimeid; 545 print " mimeid ", $uu->mimeid, "\n";
521 print " mimetype ", $uu->mimetype; 546 print " mimetype ", $uu->mimetype, "\n";
522 print "\n";
523 547
524 # print additional info about all parts 548 # print additional info about all parts
549 print " parts";
525 for ($uu->parts) { 550 for ($uu->parts) {
526 while (my ($k, $v) = each %$_) { 551 for my $k (sort keys %$_) {
527 print "$k > $v, "; 552 print " $k=$_->{$k}";
528 } 553 }
529 print "\n"; 554 print "\n";
530 } 555 }
531 556
532 $uu->decode_temp;
533 print " temporarily decoded to ", $uu->binfile, "\n";
534 $uu->remove_temp; 557 $uu->remove_temp;
535 558
536 print strerror $uu->decode; 559 if (my $err = $uu->decode) {
560 print " ERROR ", strerror $err, "\n";
561 } else {
537 print " saved as uudst/", $uu->filename, "\n"; 562 print " successfully saved as uudst/", $uu->filename, "\n";
538 } 563 }
564 }
539 565
540 print "cleanup...\n"; 566 print "cleanup...\n";
541 567
542 CleanUp(); 568 CleanUp;
569
570=head1 PERLMULTICORE SUPPORT
571
572This module supports the perlmulticore standard (see
573L<http://perlmulticore.schmorp.de/> for more info) for the following
574functions - generally these are functions accessing the disk and/or using
575considerable CPU time:
576
577 LoadFile
578 $item->decode
579 $item->decode_temp
580 $item->remove_temp
581 $item->info
582
583The perl interpreter will be reacquired/released on every callback
584invocation, so for performance reasons, callbacks should be avoided if
585that is costly.
586
587Future versions might enable multicore support for more functions.
588
589=head1 BUGS AND LIMITATIONS
590
591The original uulib library this module uses was written at a time where
592main memory of measured in megabytes and buffer overflows as a security
593thign didn't exist. While a lot of security fixes have been applied over
594the years (includign some defense in depth mechanism that can shield
595against a lot of as-of-yet undetected bugs), using this library for
596security purposes requires care.
597
598Likewise, file sizes when the uulib library was written were tiny compared
599to today, so do not expect this library to handle files larger than 2GB.
600
601Lastly, this module uses a very "C-like" interface, which means it doesn't
602protect you from invalid points as you might expect from "more perlish"
603modules - for example, accessing a file item object after callinbg
604C<CleanUp> will likely result in crashes, memory corruption, or worse.
543 605
544=head1 AUTHOR 606=head1 AUTHOR
545 607
546Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written 608Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
547by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 609by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
548bugfixed by Marc Lehmann. 610bugfixed by Marc Lehmann.
549 611
550=head1 SEE ALSO 612=head1 SEE ALSO
551 613
552perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 614perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
553 615
554=cut 616=cut
617

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines