ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.15 by root, Tue Oct 15 23:35:31 2002 UTC vs.
Revision 1.48 by root, Fri Feb 28 17:19:02 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3no warnings;
4use strict;
2 5
3use Carp; 6use Carp;
4 7
5require Exporter; 8require Exporter;
6require DynaLoader; 9require DynaLoader;
7 10
8$VERSION = 0.31; 11our $VERSION = 1.62;
9 12
10@ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
11 14
12@_consts = qw( 15our @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14 17
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17 20
18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19 22
23 OPT_RBUF OPT_WBUF
20 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
21 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
22 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
23 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
24 28
25 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
26 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
27 31
28 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
29 XX_ENCODED UU_ENCODED YENC_ENCODED 33 XX_ENCODED UU_ENCODED YENC_ENCODED
30); 34);
31 35
32@_funcs = qw( 36our @_funcs = qw(
33 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
34 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
35 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
36 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
37 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
38 E_PrepPartial 42 E_PrepPartial
39 43
40 straction strencoding strmsglevel 44 straction strencoding strmsglevel
41); 45);
42 46
43@EXPORT = @_consts; 47our @EXPORT = @_consts;
44@EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
45%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
46 50
47bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
48 52
49Initialize(); 53# dummy function for compatiiblity with pre-1.7 versions
50 54sub Initialize { }
51# not when < 5.005_6x
52# END { CleanUp() }
53 55
54for (@_consts) { 56for (@_consts) {
55 my $constant = constant($_); 57 my $constant = constant ($_);
58 no strict 'refs';
56 *$_ = sub () { $constant }; 59 *$_ = sub () { $constant };
57} 60}
58 61
59# action code -> string mapping 62# action code -> string mapping
60sub straction($) { 63sub straction($) {
100 use Convert::UUlib ':all'; 103 use Convert::UUlib ':all';
101 104
102 # read all the files named on the commandline and decode them 105 # read all the files named on the commandline and decode them
103 # into the CURRENT directory. See below for a longer example. 106 # into the CURRENT directory. See below for a longer example.
104 LoadFile $_ for @ARGV; 107 LoadFile $_ for @ARGV;
105 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 108
109 for my $uu (GetFileList) {
106 if ($uu->state & FILE_OK) { 110 if ($uu->state & FILE_OK) {
107 $uu->decode; 111 $uu->decode;
108 print $uu->filename, "\n"; 112 print $uu->filename, "\n";
109 } 113 }
110 } 114 }
154 OPT_TINYB64 detect short B64 outside of Mime 158 OPT_TINYB64 detect short B64 outside of Mime
155 OPT_ENCEXT extension for single-part encoded files 159 OPT_ENCEXT extension for single-part encoded files
156 OPT_REMOVE remove input files after decoding (dangerous) 160 OPT_REMOVE remove input files after decoding (dangerous)
157 OPT_MOREMIME strict MIME adherence 161 OPT_MOREMIME strict MIME adherence
158 OPT_DOTDOT ".."-unescaping has not yet been done on input files 162 OPT_DOTDOT ".."-unescaping has not yet been done on input files
163 OPT_RBUF set default read I/O buffer size in bytes
164 OPT_WBUF set default write I/O buffer size in bytes
165 OPT_AUTOCHECK automatically check file list after every loadfile
159 166
160=head2 Result/Error codes 167=head2 Result/Error codes
161 168
162 RET_OK everything went fine 169 RET_OK everything went fine
163 RET_IOERR I/O Error - examine errno 170 RET_IOERR I/O Error - examine errno
206again. 213again.
207 214
208On my machine, a fairly complete decode with DBI backend needs about 10MB 215On my machine, a fairly complete decode with DBI backend needs about 10MB
209RSS to decode 20000 files. 216RSS to decode 20000 files.
210 217
211=over 4 218=over
212
213=item Initialize
214
215Not normally necessary, (re-)initializes the library.
216 219
217=item CleanUp 220=item CleanUp
218 221
219Not normally necessary, could be called at the end to release memory 222Release memory, file items and clean up files. Should be called after a
220before starting a new decoding round. 223decoidng run, if you want to start a new one.
221 224
222=back 225=back
223 226
224=head2 Setting and querying options 227=head2 Setting and querying options
225 228
226=over 4 229=over
227 230
228=item $option = GetOption OPT_xxx 231=item $option = GetOption OPT_xxx
229 232
230=item SetOption OPT_xxx, opt-value 233=item SetOption OPT_xxx, opt-value
231 234
233 236
234See the C<OPT_xxx> constants above to see which options exist. 237See the C<OPT_xxx> constants above to see which options exist.
235 238
236=head2 Setting various callbacks 239=head2 Setting various callbacks
237 240
238=over 4 241=over
239 242
240=item SetMsgCallback [callback-function] 243=item SetMsgCallback [callback-function]
241 244
242=item SetBusyCallback [callback-function] 245=item SetBusyCallback [callback-function]
243 246
247 250
248=back 251=back
249 252
250=head2 Call the currently selected FNameFilter 253=head2 Call the currently selected FNameFilter
251 254
252=over 4 255=over
253 256
254=item $file = FNameFilter $file 257=item $file = FNameFilter $file
255 258
256=back 259=back
257 260
258=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 261=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
259 262
260=over 4 263=over
261 264
262=item ($retval, $count) = LoadFile $fname, [$id, [$delflag]] 265=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
263 266
264Load the given file and scan it for encoded contents. Optionally tag it 267Load the given file and scan it for encoded contents. Optionally tag it
265with the given id, and if C<$delflag> is true, delete the file after it is 268with the given id, and if C<$delflag> is true, delete the file after it
266no longer necessary. 269is no longer necessary. If you are certain of the part number, you can
270specify it as the last argument.
271
272A better (usually faster) way of doing this is using the C<SetFNameFilter>
273functionality.
267 274
268=item $retval = Smerge $pass 275=item $retval = Smerge $pass
269 276
270If you are desperate, try to call C<Smerge> with increasing C<$pass> 277If you are desperate, try to call C<Smerge> with increasing C<$pass>
271values, beginning at C<0>, to try to merge parts that usually would not 278values, beginning at C<0>, to try to merge parts that usually would not
272have been merged. 279have been merged.
273 280
274Most probably this will result in garbled files, so never do this by 281Most probably this will result in garbled files, so never do this by
275default. 282default, except:
283
284If the C<OPT_AUTOCHECK> option has been disabled (by default it is
285enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
286after loading all files as an additional pre-pass (which is normally done
287by C<LoadFile>).
276 288
277=item $item = GetFileListItem $item_number 289=item $item = GetFileListItem $item_number
278 290
279Return the C<$item> structure for the C<$item_number>'th found file, or 291Return the C<$item> structure for the C<$item_number>'th found file, or
280C<undef> of no file with that number exists. 292C<undef> of no file with that number exists.
281 293
282The first file has number C<0>, and the series has no holes, so you can 294The first file has number C<0>, and the series has no holes, so you can
283iterate over all files by starting with zero and incrementing until you 295iterate over all files by starting with zero and incrementing until you
284hit C<undef>. 296hit C<undef>.
285 297
298This function has to walk the linear list of fils on each access, so
299if you want to iterate over all items, it is usually faster to use
300C<GetFileList>.
301
302=item @items = GetFileList
303
304Similar to C<GetFileListItem>, but returns all files in one go.
305
286=back 306=back
287 307
288=head2 Decoding files 308=head2 Decoding files
289 309
290=over 4 310=over
291 311
292=item $retval = $item->rename($newname) 312=item $retval = $item->rename ($newname)
293 313
294Change the ondisk filename where the decoded file will be saved. 314Change the ondisk filename where the decoded file will be saved.
295 315
296=item $retval = $item->decode_temp 316=item $retval = $item->decode_temp
297 317
300 320
301=item $retval = $item->remove_temp 321=item $retval = $item->remove_temp
302 322
303Remove the temporarily decoded file again. 323Remove the temporarily decoded file again.
304 324
305=item $retval = $item->decode([$target_path]) 325=item $retval = $item->decode ([$target_path])
306 326
307Decode the file to it's destination, or the given target path. 327Decode the file to its destination, or the given target path.
308 328
309=item $retval = $item->info(callback-function) 329=item $retval = $item->info (callback-function)
310 330
311=back 331=back
312 332
313=head2 Querying (and setting) item attributes 333=head2 Querying (and setting) item attributes
314 334
315=over 4 335=over
316 336
317=item $state = $item->state 337=item $state = $item->state
318 338
319=item $mode = $item->mode([newmode]) 339=item $mode = $item->mode ([newmode])
320 340
321=item $uudet = $item->uudet 341=item $uudet = $item->uudet
322 342
323=item $size = $item->size 343=item $size = $item->size
324 344
325=item $filename = $item->filename([newfilename}) 345=item $filename = $item->filename ([newfilename})
326 346
327=item $subfname = $item->subfname 347=item $subfname = $item->subfname
328 348
329=item $mimeid = $item->mimeid 349=item $mimeid = $item->mimeid
330 350
334 354
335=back 355=back
336 356
337=head2 Information about source parts 357=head2 Information about source parts
338 358
339=over 4 359=over
340 360
341=item $parts = $item->parts 361=item $parts = $item->parts
342 362
343Return information about all parts (source files) used to decode the file 363Return information about all parts (source files) used to decode the file
344as a list of hashrefs with the following structure: 364as a list of hashrefs with the following structure:
358Usually you are interested mostly the C<sfname> and possibly the C<partno> 378Usually you are interested mostly the C<sfname> and possibly the C<partno>
359and C<filename> members. 379and C<filename> members.
360 380
361=back 381=back
362 382
363=head2 Functions below not documented and not very well tested 383=head2 Functions below are not documented and not very well tested - feedback welcome
364 384
365 QuickDecode 385 QuickDecode
366 EncodeMulti 386 EncodeMulti
367 EncodePartial 387 EncodePartial
368 EncodeToStream 388 EncodeToStream
372 392
373=head2 EXTENSION FUNCTIONS 393=head2 EXTENSION FUNCTIONS
374 394
375Functions found in this module but not documented in the uulib documentation: 395Functions found in this module but not documented in the uulib documentation:
376 396
377=over 4 397=over
378 398
379=item $msg = straction ACT_xxx 399=item $msg = straction ACT_xxx
380 400
381Return a human readable string representing the given action code. 401Return a human readable string representing the given action code.
382 402
423 443
424=back 444=back
425 445
426=head1 LARGE EXAMPLE DECODER 446=head1 LARGE EXAMPLE DECODER
427 447
448The general workflow for decoding is like this:
449
450=over
451
452=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
453
454=item 2. Load all source files with C<LoadFile>.
455
456=item 3. Optionally C<Smerge>.
457
458=item 4. Iterate over all C<GetFileList> items (i.e. result files).
459
460=item 5. C<CleanUp> to delete files and free items.
461
462=back
463
428This is the file C<example-decoder> from the distribution, put here 464What follows is the file C<example-decoder> from the distribution that
429instead of more thorough documentation. 465illustrates the above worklfow in a non-trivial example.
430 466
467 #!/usr/bin/perl
468
431 # decode all the files in the directory uusrc/ and copy 469 # decode all the files in the directory uusrc/ and copy
432 # the resulting files to uudst/ 470 # the resulting files to uudst/
433 471
434 use Convert::UUlib ':all'; 472 use Convert::UUlib ':all';
435 473
436 sub namefilter { 474 sub namefilter {
437 my($path)=@_; 475 my ($path) = @_;
476
438 $path=~s/^.*[\/\\]//; 477 $path=~s/^.*[\/\\]//;
478
439 $path; 479 $path
440 } 480 }
441 481
442 sub busycb { 482 sub busycb {
443 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 483 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
444 $_[0]=straction($action); 484 $_[0]=straction($action);
445 print "busy_callback(", (join ",",@_), ")\n"; 485 print "busy_callback(", (join ",",@_), ")\n";
446 0; 486 0
447 } 487 }
448 488
489 SetOption OPT_RBUF, 128*1024;
490 SetOption OPT_WBUF, 1024*1024;
449 SetOption OPT_IGNMODE, 1; 491 SetOption OPT_IGNMODE, 1;
492 SetOption OPT_IGNMODE, 1;
450 SetOption OPT_VERBOSE, 1; 493 SetOption OPT_VERBOSE, 1;
451 494
452 # show the three ways you can set callback functions. I normally 495 # show the three ways you can set callback functions. I normally
453 # prefer the one with the sub inplace. 496 # prefer the one with the sub inplace.
454 SetFNameFilter \&namefilter; 497 SetFNameFilter \&namefilter;
455 498
456 SetBusyCallback "busycb", 333; 499 SetBusyCallback "busycb", 333;
457 500
458 SetMsgCallback sub { 501 SetMsgCallback sub {
459 my ($msg, $level) = @_; 502 my ($msg, $level) = @_;
460 print uc strmsglevel $_[1], ": $msg\n"; 503 print uc strmsglevel $_[1], ": $msg\n";
461 }; 504 };
462 505
463 # the following non-trivial FileNameCallback takes care 506 # the following non-trivial FileNameCallback takes care
464 # of some subject lines not detected properly by uulib: 507 # of some subject lines not detected properly by uulib:
465 SetFileNameCallback sub { 508 SetFileNameCallback sub {
466 return unless $_[1]; # skip "Re:"-plies et al. 509 return unless $_[1]; # skip "Re:"-plies et al.
467 local $_ = $_[0]; 510 local $_ = $_[0];
468 511
469 # the following rules are rather effective on some newsgroups, 512 # the following rules are rather effective on some newsgroups,
470 # like alt.binaries.games.anime, where non-mime, uuencoded data 513 # like alt.binaries.games.anime, where non-mime, uuencoded data
471 # is very common 514 # is very common
472 515
473 # if we find some *.rar, take it as the filename 516 # if we find some *.rar, take it as the filename
474 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 517 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
475 518
476 # one common subject format 519 # one common subject format
477 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 520 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
478 521
479 # - filename.par (04/55) 522 # - filename.par (04/55)
480 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 523 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
481 524
482 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 525 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
483 # - (20 files) No.17 Roseanne.jpg [2/2] 526 # - (20 files) No.17 Roseanne.jpg [2/2]
484 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 527 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
485 528
529 # try to detect some common forms of filenames
530 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
531
486 # otherwise just pass what we have 532 # otherwise just pass what we have
487 return (); 533 ()
488 }; 534 };
489 535
490 # now read all files in the directory uusrc/* 536 # now read all files in the directory uusrc/*
491 for(<uusrc/*>) { 537 for (<uusrc/*>) {
492 my($retval,$count)=LoadFile ($_, $_, 1); 538 my ($retval, $count) = LoadFile ($_, $_, 1);
493 print "file($_), status(", strerror $retval, ") parts($count)\n"; 539 print "file($_), status(", strerror $retval, ") parts($count)\n";
494 } 540 }
495 541
496 SetOption OPT_SAVEPATH, "uudst/"; 542 SetOption OPT_SAVEPATH, "uudst/";
497 543
498 # now wade through all files and their source parts 544 # now wade through all files and their source parts
499 $i = 0; 545 for my $uu (GetFileList) {
500 while ($uu = GetFileListItem($i)) { 546 print "file ", $uu->filename, "\n";
501 $i++;
502 print "file nr. $i";
503 print " state ", $uu->state; 547 print " state ", $uu->state, "\n";
504 print " mode ", $uu->mode; 548 print " mode ", $uu->mode, "\n";
505 print " uudet ", strencoding $uu->uudet; 549 print " uudet ", strencoding $uu->uudet, "\n";
506 print " size ", $uu->size; 550 print " size ", $uu->size, "\n";
507 print " filename ", $uu->filename;
508 print " subfname ", $uu->subfname; 551 print " subfname ", $uu->subfname, "\n";
509 print " mimeid ", $uu->mimeid; 552 print " mimeid ", $uu->mimeid, "\n";
510 print " mimetype ", $uu->mimetype; 553 print " mimetype ", $uu->mimetype, "\n";
511 print "\n";
512 554
513 # print additional info about all parts 555 # print additional info about all parts
556 print " parts";
514 for ($uu->parts) { 557 for ($uu->parts) {
515 while (my ($k, $v) = each %$_) { 558 for my $k (sort keys %$_) {
516 print "$k > $v, "; 559 print " $k=$_->{$k}";
517 } 560 }
518 print "\n"; 561 print "\n";
519 } 562 }
520 563
521 $uu->decode_temp;
522 print " temporarily decoded to ", $uu->binfile, "\n";
523 $uu->remove_temp; 564 $uu->remove_temp;
524 565
525 print strerror $uu->decode; 566 if (my $err = $uu->decode) {
567 print " ERROR ", strerror $err, "\n";
568 } else {
526 print " saved as uudst/", $uu->filename, "\n"; 569 print " successfully saved as uudst/", $uu->filename, "\n";
527 } 570 }
571 }
528 572
529 print "cleanup...\n"; 573 print "cleanup...\n";
530 574
531 CleanUp(); 575 CleanUp;
576
577=head1 PERLMULTICORE SUPPORT
578
579This module supports the perlmulticore standard (see
580L<http://perlmulticore.schmorp.de/> for more info) for the following
581functions - generally these are functions accessing the disk and/or using
582considerable CPU time:
583
584 LoadFile
585 $item->decode
586 $item->decode_temp
587 $item->remove_temp
588 $item->info
589
590The perl interpreter will be reacquired/released on every callback
591invocation, so for performance reasons, callbacks should be avoided if
592that is costly.
593
594Future versions might enable multicore support for more functions.
595
596=head1 BUGS AND LIMITATIONS
597
598The original uulib library this module uses was written at a time where
599main memory of measured in megabytes and buffer overflows as a security
600thign didn't exist. While a lot of security fixes have been applied over
601the years (includign some defense in depth mechanism that can shield
602against a lot of as-of-yet undetected bugs), using this library for
603security purposes requires care.
604
605Likewise, file sizes when the uulib library was written were tiny compared
606to today, so do not expect this library to handle files larger than 2GB.
532 607
533=head1 AUTHOR 608=head1 AUTHOR
534 609
535Marc Lehmann <pcg@goof.com>, the original uulib library was written 610Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
536by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 611by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
537bugfixed by Marc Lehmann. 612bugfixed by Marc Lehmann.
538 613
539=head1 SEE ALSO 614=head1 SEE ALSO
540 615
541perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 616perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
542 617
543=cut 618=cut
619

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines