ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.15 by root, Tue Oct 15 23:35:31 2002 UTC vs.
Revision 1.54 by root, Sat Dec 12 10:48:39 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3use common::sense;
2 4
3use Carp; 5use Carp;
4 6
5require Exporter; 7require Exporter;
6require DynaLoader; 8require DynaLoader;
7 9
8$VERSION = 0.31; 10our $VERSION = 1.71;
9 11
10@ISA = qw(Exporter DynaLoader); 12our @ISA = qw(Exporter DynaLoader);
11 13
12@_consts = qw( 14our @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 15 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14 16
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 17 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 18 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17 19
18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 20 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19 21
22 OPT_RBUF OPT_WBUF
20 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 23 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
21 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 24 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
22 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 25 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
23 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 26 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
24 27
25 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 28 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
26 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 29 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
27 30
28 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 31 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
29 XX_ENCODED UU_ENCODED YENC_ENCODED 32 XX_ENCODED UU_ENCODED YENC_ENCODED
30); 33);
31 34
32@_funcs = qw( 35our @_funcs = qw(
33 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 36 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
34 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 37 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
35 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 38 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
36 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 39 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
37 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 40 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
38 E_PrepPartial 41 E_PrepPartial
39 42
40 straction strencoding strmsglevel 43 straction strencoding strmsglevel
41); 44);
42 45
43@EXPORT = @_consts; 46our @EXPORT = @_consts;
44@EXPORT_OK = @_funcs; 47our @EXPORT_OK = @_funcs;
45%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 48our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
46 49
47bootstrap Convert::UUlib $VERSION; 50bootstrap Convert::UUlib $VERSION;
48 51
49Initialize(); 52# dummy function for compatiiblity with pre-1.7 versions
50 53sub Initialize { }
51# not when < 5.005_6x
52# END { CleanUp() }
53
54for (@_consts) {
55 my $constant = constant($_);
56 *$_ = sub () { $constant };
57}
58 54
59# action code -> string mapping 55# action code -> string mapping
60sub straction($) { 56sub straction($) {
61 return 'copying' if $_[0] == &ACT_COPYING; 57 return 'copying' if $_[0] == &ACT_COPYING;
62 return 'decoding' if $_[0] == &ACT_DECODING; 58 return 'decoding' if $_[0] == &ACT_DECODING;
100 use Convert::UUlib ':all'; 96 use Convert::UUlib ':all';
101 97
102 # read all the files named on the commandline and decode them 98 # read all the files named on the commandline and decode them
103 # into the CURRENT directory. See below for a longer example. 99 # into the CURRENT directory. See below for a longer example.
104 LoadFile $_ for @ARGV; 100 LoadFile $_ for @ARGV;
105 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 101
102 for my $uu (GetFileList) {
106 if ($uu->state & FILE_OK) { 103 if ($uu->state & FILE_OK) {
107 $uu->decode; 104 $uu->decode;
108 print $uu->filename, "\n"; 105 print $uu->filename, "\n";
109 } 106 }
110 } 107 }
154 OPT_TINYB64 detect short B64 outside of Mime 151 OPT_TINYB64 detect short B64 outside of Mime
155 OPT_ENCEXT extension for single-part encoded files 152 OPT_ENCEXT extension for single-part encoded files
156 OPT_REMOVE remove input files after decoding (dangerous) 153 OPT_REMOVE remove input files after decoding (dangerous)
157 OPT_MOREMIME strict MIME adherence 154 OPT_MOREMIME strict MIME adherence
158 OPT_DOTDOT ".."-unescaping has not yet been done on input files 155 OPT_DOTDOT ".."-unescaping has not yet been done on input files
156 OPT_RBUF set default read I/O buffer size in bytes
157 OPT_WBUF set default write I/O buffer size in bytes
158 OPT_AUTOCHECK automatically check file list after every loadfile
159 159
160=head2 Result/Error codes 160=head2 Result/Error codes
161 161
162 RET_OK everything went fine 162 RET_OK everything went fine
163 RET_IOERR I/O Error - examine errno 163 RET_IOERR I/O Error - examine errno
206again. 206again.
207 207
208On my machine, a fairly complete decode with DBI backend needs about 10MB 208On my machine, a fairly complete decode with DBI backend needs about 10MB
209RSS to decode 20000 files. 209RSS to decode 20000 files.
210 210
211=over 4 211=over
212
213=item Initialize
214
215Not normally necessary, (re-)initializes the library.
216 212
217=item CleanUp 213=item CleanUp
218 214
219Not normally necessary, could be called at the end to release memory 215Release memory, file items and clean up files. Should be called after a
220before starting a new decoding round. 216decoidng run, if you want to start a new one.
221 217
222=back 218=back
223 219
224=head2 Setting and querying options 220=head2 Setting and querying options
225 221
226=over 4 222=over
227 223
228=item $option = GetOption OPT_xxx 224=item $option = GetOption OPT_xxx
229 225
230=item SetOption OPT_xxx, opt-value 226=item SetOption OPT_xxx, opt-value
231 227
233 229
234See the C<OPT_xxx> constants above to see which options exist. 230See the C<OPT_xxx> constants above to see which options exist.
235 231
236=head2 Setting various callbacks 232=head2 Setting various callbacks
237 233
238=over 4 234=over
239 235
240=item SetMsgCallback [callback-function] 236=item SetMsgCallback [callback-function]
241 237
242=item SetBusyCallback [callback-function] 238=item SetBusyCallback [callback-function]
243 239
247 243
248=back 244=back
249 245
250=head2 Call the currently selected FNameFilter 246=head2 Call the currently selected FNameFilter
251 247
252=over 4 248=over
253 249
254=item $file = FNameFilter $file 250=item $file = FNameFilter $file
255 251
256=back 252=back
257 253
258=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 254=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
259 255
260=over 4 256=over
261 257
262=item ($retval, $count) = LoadFile $fname, [$id, [$delflag]] 258=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
263 259
264Load the given file and scan it for encoded contents. Optionally tag it 260Load the given file and scan it for encoded contents. Optionally tag it
265with the given id, and if C<$delflag> is true, delete the file after it is 261with the given id, and if C<$delflag> is true, delete the file after it
266no longer necessary. 262is no longer necessary. If you are certain of the part number, you can
263specify it as the last argument.
264
265A better (usually faster) way of doing this is using the C<SetFNameFilter>
266functionality.
267 267
268=item $retval = Smerge $pass 268=item $retval = Smerge $pass
269 269
270If you are desperate, try to call C<Smerge> with increasing C<$pass> 270If you are desperate, try to call C<Smerge> with increasing C<$pass>
271values, beginning at C<0>, to try to merge parts that usually would not 271values, beginning at C<0>, to try to merge parts that usually would not
272have been merged. 272have been merged.
273 273
274Most probably this will result in garbled files, so never do this by 274Most probably this will result in garbled files, so never do this by
275default. 275default, except:
276
277If the C<OPT_AUTOCHECK> option has been disabled (by default it is
278enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
279after loading all files as an additional pre-pass (which is normally done
280by C<LoadFile>).
276 281
277=item $item = GetFileListItem $item_number 282=item $item = GetFileListItem $item_number
278 283
279Return the C<$item> structure for the C<$item_number>'th found file, or 284Return the C<$item> structure for the C<$item_number>'th found file, or
280C<undef> of no file with that number exists. 285C<undef> of no file with that number exists.
281 286
282The first file has number C<0>, and the series has no holes, so you can 287The first file has number C<0>, and the series has no holes, so you can
283iterate over all files by starting with zero and incrementing until you 288iterate over all files by starting with zero and incrementing until you
284hit C<undef>. 289hit C<undef>.
285 290
291This function has to walk the linear list of fils on each access, so
292if you want to iterate over all items, it is usually faster to use
293C<GetFileList>.
294
295=item @items = GetFileList
296
297Similar to C<GetFileListItem>, but returns all files in one go, which is
298very much faster for large number of items, and has no drawbacks when used
299for a small number of items.
300
286=back 301=back
287 302
288=head2 Decoding files 303=head2 Decoding files
289 304
290=over 4 305=over
291 306
292=item $retval = $item->rename($newname) 307=item $retval = $item->rename ($newname)
293 308
294Change the ondisk filename where the decoded file will be saved. 309Change the ondisk filename where the decoded file will be saved.
295 310
296=item $retval = $item->decode_temp 311=item $retval = $item->decode_temp
297 312
300 315
301=item $retval = $item->remove_temp 316=item $retval = $item->remove_temp
302 317
303Remove the temporarily decoded file again. 318Remove the temporarily decoded file again.
304 319
305=item $retval = $item->decode([$target_path]) 320=item $retval = $item->decode ([$target_path])
306 321
307Decode the file to it's destination, or the given target path. 322Decode the file to its destination, or the given target path.
308 323
309=item $retval = $item->info(callback-function) 324=item $retval = $item->info (callback-function)
310 325
311=back 326=back
312 327
313=head2 Querying (and setting) item attributes 328=head2 Querying (and setting) item attributes
314 329
315=over 4 330=over
316 331
317=item $state = $item->state 332=item $state = $item->state
318 333
319=item $mode = $item->mode([newmode]) 334=item $mode = $item->mode ([newmode])
320 335
321=item $uudet = $item->uudet 336=item $uudet = $item->uudet
322 337
323=item $size = $item->size 338=item $size = $item->size
324 339
325=item $filename = $item->filename([newfilename}) 340=item $filename = $item->filename ([newfilename})
326 341
327=item $subfname = $item->subfname 342=item $subfname = $item->subfname
328 343
329=item $mimeid = $item->mimeid 344=item $mimeid = $item->mimeid
330 345
334 349
335=back 350=back
336 351
337=head2 Information about source parts 352=head2 Information about source parts
338 353
339=over 4 354=over
340 355
341=item $parts = $item->parts 356=item $parts = $item->parts
342 357
343Return information about all parts (source files) used to decode the file 358Return information about all parts (source files) used to decode the file
344as a list of hashrefs with the following structure: 359as a list of hashrefs with the following structure:
358Usually you are interested mostly the C<sfname> and possibly the C<partno> 373Usually you are interested mostly the C<sfname> and possibly the C<partno>
359and C<filename> members. 374and C<filename> members.
360 375
361=back 376=back
362 377
363=head2 Functions below not documented and not very well tested 378=head2 Functions below are not documented and not very well tested - feedback welcome
364 379
365 QuickDecode 380 QuickDecode
366 EncodeMulti 381 EncodeMulti
367 EncodePartial 382 EncodePartial
368 EncodeToStream 383 EncodeToStream
372 387
373=head2 EXTENSION FUNCTIONS 388=head2 EXTENSION FUNCTIONS
374 389
375Functions found in this module but not documented in the uulib documentation: 390Functions found in this module but not documented in the uulib documentation:
376 391
377=over 4 392=over
378 393
379=item $msg = straction ACT_xxx 394=item $msg = straction ACT_xxx
380 395
381Return a human readable string representing the given action code. 396Return a human readable string representing the given action code.
382 397
423 438
424=back 439=back
425 440
426=head1 LARGE EXAMPLE DECODER 441=head1 LARGE EXAMPLE DECODER
427 442
443The general workflow for decoding is like this:
444
445=over
446
447=item 1. Configure options with C<SetOption> or C<SetXXXCallback>.
448
449=item 2. Load all source files with C<LoadFile>.
450
451=item 3. Optionally C<Smerge>.
452
453=item 4. Iterate over all C<GetFileList> items (i.e. result files).
454
455=item 5. C<CleanUp> to delete files and free items.
456
457=back
458
428This is the file C<example-decoder> from the distribution, put here 459What follows is the file C<example-decoder> from the distribution that
429instead of more thorough documentation. 460illustrates the above worklfow in a non-trivial example.
430 461
462 #!/usr/bin/perl
463
431 # decode all the files in the directory uusrc/ and copy 464 # decode all the files in the directory uusrc/ and copy
432 # the resulting files to uudst/ 465 # the resulting files to uudst/
433 466
434 use Convert::UUlib ':all'; 467 use Convert::UUlib ':all';
435 468
436 sub namefilter { 469 sub namefilter {
437 my($path)=@_; 470 my ($path) = @_;
471
438 $path=~s/^.*[\/\\]//; 472 $path=~s/^.*[\/\\]//;
473
439 $path; 474 $path
440 } 475 }
441 476
442 sub busycb { 477 sub busycb {
443 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 478 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
444 $_[0]=straction($action); 479 $_[0]=straction($action);
445 print "busy_callback(", (join ",",@_), ")\n"; 480 print "busy_callback(", (join ",",@_), ")\n";
446 0; 481 0
447 } 482 }
448 483
484 SetOption OPT_RBUF, 128*1024;
485 SetOption OPT_WBUF, 1024*1024;
449 SetOption OPT_IGNMODE, 1; 486 SetOption OPT_IGNMODE, 1;
487 SetOption OPT_IGNMODE, 1;
450 SetOption OPT_VERBOSE, 1; 488 SetOption OPT_VERBOSE, 1;
489 SetOption OPT_AUTOCHK, 0;
451 490
452 # show the three ways you can set callback functions. I normally 491 # show the three ways you can set callback functions. I normally
453 # prefer the one with the sub inplace. 492 # prefer the one with the sub inplace.
454 SetFNameFilter \&namefilter; 493 SetFNameFilter \&namefilter;
455 494
456 SetBusyCallback "busycb", 333; 495 SetBusyCallback "busycb", 333;
457 496
458 SetMsgCallback sub { 497 SetMsgCallback sub {
459 my ($msg, $level) = @_; 498 my ($msg, $level) = @_;
460 print uc strmsglevel $_[1], ": $msg\n"; 499 print uc strmsglevel $_[1], ": $msg\n";
461 }; 500 };
462 501
463 # the following non-trivial FileNameCallback takes care 502 # the following non-trivial FileNameCallback takes care
464 # of some subject lines not detected properly by uulib: 503 # of some subject lines not detected properly by uulib:
465 SetFileNameCallback sub { 504 SetFileNameCallback sub {
466 return unless $_[1]; # skip "Re:"-plies et al. 505 return unless $_[1]; # skip "Re:"-plies et al.
467 local $_ = $_[0]; 506 local $_ = $_[0];
468 507
469 # the following rules are rather effective on some newsgroups, 508 # the following rules are rather effective on some newsgroups,
470 # like alt.binaries.games.anime, where non-mime, uuencoded data 509 # like alt.binaries.games.anime, where non-mime, uuencoded data
471 # is very common 510 # is very common
472 511
473 # if we find some *.rar, take it as the filename 512 # if we find some *.rar, take it as the filename
474 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 513 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
475 514
476 # one common subject format 515 # one common subject format
477 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 516 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
478 517
479 # - filename.par (04/55) 518 # - filename.par (04/55)
480 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 519 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
481 520
482 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 521 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
483 # - (20 files) No.17 Roseanne.jpg [2/2] 522 # - (20 files) No.17 Roseanne.jpg [2/2]
484 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 523 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
485 524
525 # try to detect some common forms of filenames
526 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
527
486 # otherwise just pass what we have 528 # otherwise just pass what we have
487 return (); 529 ()
488 }; 530 };
489 531
490 # now read all files in the directory uusrc/* 532 # now read all files in the directory uusrc/*
491 for(<uusrc/*>) { 533 for (<uusrc/*>) {
492 my($retval,$count)=LoadFile ($_, $_, 1); 534 my ($retval, $count) = LoadFile ($_, $_, 1);
493 print "file($_), status(", strerror $retval, ") parts($count)\n"; 535 print "file($_), status(", strerror $retval, ") parts($count)\n";
494 } 536 }
495 537
538 Smerge -1;
539
496 SetOption OPT_SAVEPATH, "uudst/"; 540 SetOption OPT_SAVEPATH, "uudst/";
497 541
498 # now wade through all files and their source parts 542 # now wade through all files and their source parts
499 $i = 0; 543 for my $uu (GetFileList) {
500 while ($uu = GetFileListItem($i)) { 544 print "file ", $uu->filename, "\n";
501 $i++;
502 print "file nr. $i";
503 print " state ", $uu->state; 545 print " state ", $uu->state, "\n";
504 print " mode ", $uu->mode; 546 print " mode ", $uu->mode, "\n";
505 print " uudet ", strencoding $uu->uudet; 547 print " uudet ", strencoding $uu->uudet, "\n";
506 print " size ", $uu->size; 548 print " size ", $uu->size, "\n";
507 print " filename ", $uu->filename;
508 print " subfname ", $uu->subfname; 549 print " subfname ", $uu->subfname, "\n";
509 print " mimeid ", $uu->mimeid; 550 print " mimeid ", $uu->mimeid, "\n";
510 print " mimetype ", $uu->mimetype; 551 print " mimetype ", $uu->mimetype, "\n";
511 print "\n";
512 552
513 # print additional info about all parts 553 # print additional info about all parts
554 print " parts";
514 for ($uu->parts) { 555 for ($uu->parts) {
515 while (my ($k, $v) = each %$_) { 556 for my $k (sort keys %$_) {
516 print "$k > $v, "; 557 print " $k=$_->{$k}";
517 } 558 }
518 print "\n"; 559 print "\n";
519 } 560 }
520 561
521 $uu->decode_temp;
522 print " temporarily decoded to ", $uu->binfile, "\n";
523 $uu->remove_temp; 562 $uu->remove_temp;
524 563
525 print strerror $uu->decode; 564 if (my $err = $uu->decode) {
565 print " ERROR ", strerror $err, "\n";
566 } else {
526 print " saved as uudst/", $uu->filename, "\n"; 567 print " successfully saved as uudst/", $uu->filename, "\n";
527 } 568 }
569 }
528 570
529 print "cleanup...\n"; 571 print "cleanup...\n";
530 572
531 CleanUp(); 573 CleanUp;
574
575=head1 PERLMULTICORE SUPPORT
576
577This module supports the perlmulticore standard (see
578L<http://perlmulticore.schmorp.de/> for more info) for the following
579functions - generally these are functions accessing the disk and/or using
580considerable CPU time:
581
582 LoadFile
583 $item->decode
584 $item->decode_temp
585 $item->remove_temp
586 $item->info
587
588The perl interpreter will be reacquired/released on every callback
589invocation, so for performance reasons, callbacks should be avoided if
590that is costly.
591
592Future versions might enable multicore support for more functions.
593
594=head1 BUGS AND LIMITATIONS
595
596The original uulib library this module uses was written at a time where
597main memory of measured in megabytes and buffer overflows as a security
598thign didn't exist. While a lot of security fixes have been applied over
599the years (includign some defense in depth mechanism that can shield
600against a lot of as-of-yet undetected bugs), using this library for
601security purposes requires care.
602
603Likewise, file sizes when the uulib library was written were tiny compared
604to today, so do not expect this library to handle files larger than 2GB.
605
606Lastly, this module uses a very "C-like" interface, which means it doesn't
607protect you from invalid points as you might expect from "more perlish"
608modules - for example, accessing a file item object after callinbg
609C<CleanUp> will likely result in crashes, memory corruption, or worse.
532 610
533=head1 AUTHOR 611=head1 AUTHOR
534 612
535Marc Lehmann <pcg@goof.com>, the original uulib library was written 613Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
536by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 614by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
537bugfixed by Marc Lehmann. 615bugfixed by Marc Lehmann.
538 616
539=head1 SEE ALSO 617=head1 SEE ALSO
540 618
541perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 619perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
542 620
543=cut 621=cut
622

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines