ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.13 by root, Tue Oct 15 23:20:29 2002 UTC vs.
Revision 1.47 by root, Fri Feb 28 16:57:25 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3no warnings;
4use strict;
2 5
3use Carp; 6use Carp;
4 7
5require Exporter; 8require Exporter;
6require DynaLoader; 9require DynaLoader;
7 10
8$VERSION = 0.31; 11our $VERSION = 1.62;
9 12
10@ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
11 14
12@_consts = qw( 15our @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14 17
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17 20
18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19 22
23 OPT_RBUF OPT_WBUF
20 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
21 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
22 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
23 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
24 28
25 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
26 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
27 31
28 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
29 XX_ENCODED UU_ENCODED YENC_ENCODED 33 XX_ENCODED UU_ENCODED YENC_ENCODED
30); 34);
31 35
32@_funcs = qw( 36our @_funcs = qw(
33 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
34 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
35 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
36 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
37 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
38 E_PrepPartial 42 E_PrepPartial
39 43
40 straction strencoding strmsglevel 44 straction strencoding strmsglevel
41); 45);
42 46
43@EXPORT = @_consts; 47our @EXPORT = @_consts;
44@EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
45%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
46 50
47bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
48 52
49Initialize(); 53Initialize();
50 54
51# not when < 5.005_6x 55# not when < 5.005_6x
52# END { CleanUp() } 56# END { CleanUp() }
53 57
54for (@_consts) { 58for (@_consts) {
55 my $constant = constant($_); 59 my $constant = constant($_);
60 no strict 'refs';
56 *$_ = sub () { $constant }; 61 *$_ = sub () { $constant };
57} 62}
58 63
59# action code -> string mapping 64# action code -> string mapping
60sub straction($) { 65sub straction($) {
100 use Convert::UUlib ':all'; 105 use Convert::UUlib ':all';
101 106
102 # read all the files named on the commandline and decode them 107 # read all the files named on the commandline and decode them
103 # into the CURRENT directory. See below for a longer example. 108 # into the CURRENT directory. See below for a longer example.
104 LoadFile $_ for @ARGV; 109 LoadFile $_ for @ARGV;
105 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 110 for my $uu (GetFileList) {
106 if ($uu->state & FILE_OK) { 111 if ($uu->state & FILE_OK) {
107 $uu->decode; 112 $uu->decode;
108 print $uu->filename, "\n"; 113 print $uu->filename, "\n";
109 } 114 }
110 } 115 }
151 OPT_PROGRESS retrieve progress information 156 OPT_PROGRESS retrieve progress information
152 OPT_USETEXT handle text messages 157 OPT_USETEXT handle text messages
153 OPT_PREAMB handle Mime preambles/epilogues 158 OPT_PREAMB handle Mime preambles/epilogues
154 OPT_TINYB64 detect short B64 outside of Mime 159 OPT_TINYB64 detect short B64 outside of Mime
155 OPT_ENCEXT extension for single-part encoded files 160 OPT_ENCEXT extension for single-part encoded files
156 OPT_REMOVE remove input files after decoding 161 OPT_REMOVE remove input files after decoding (dangerous)
157 OPT_MOREMIME strict MIME adherence 162 OPT_MOREMIME strict MIME adherence
158 OPT_DOTDOT .. unescaping has not yet been done on input files 163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes
166 OPT_AUTOCHECK automatically check file list after every loadfile
159 167
160=head2 Result/Error codes 168=head2 Result/Error codes
161 169
162 RET_OK everything went fine 170 RET_OK everything went fine
163 RET_IOERR I/O Error - examine errno 171 RET_IOERR I/O Error - examine errno
203 211
204Initialize is automatically called when the module is loaded and allocates 212Initialize is automatically called when the module is loaded and allocates
205quite a small amount of memory for todays machines ;) CleanUp releases that 213quite a small amount of memory for todays machines ;) CleanUp releases that
206again. 214again.
207 215
216On my machine, a fairly complete decode with DBI backend needs about 10MB
217RSS to decode 20000 files.
218
208=over 4 219=over
209 220
210=item Initialize 221=item Initialize
211 222
212Not normally necessary, (re-)initializes the library. 223Not normally necessary, (re-)initializes the library.
213 224
218 229
219=back 230=back
220 231
221=head2 Setting and querying options 232=head2 Setting and querying options
222 233
223=over 4 234=over
224 235
225=item $option = GetOption OPT_xxx 236=item $option = GetOption OPT_xxx
226 237
227=item SetOption OPT_xxx, opt-value 238=item SetOption OPT_xxx, opt-value
228 239
230 241
231See the C<OPT_xxx> constants above to see which options exist. 242See the C<OPT_xxx> constants above to see which options exist.
232 243
233=head2 Setting various callbacks 244=head2 Setting various callbacks
234 245
235=over 4 246=over
236 247
237=item SetMsgCallback [callback-function] 248=item SetMsgCallback [callback-function]
238 249
239=item SetBusyCallback [callback-function] 250=item SetBusyCallback [callback-function]
240 251
244 255
245=back 256=back
246 257
247=head2 Call the currently selected FNameFilter 258=head2 Call the currently selected FNameFilter
248 259
249=over 4 260=over
250 261
251=item $file = FNameFilter $file 262=item $file = FNameFilter $file
252 263
253=back 264=back
254 265
255=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 266=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
256 267
257=over 4 268=over
258 269
259=item ($retval, $count) = LoadFile $fname, [$id, [$delflag]] 270=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
260 271
261Load the given file and scan it for encoded contents. Optionally tag it 272Load the given file and scan it for encoded contents. Optionally tag it
262with the given id, and if C<$delflag> is true, delete the file after it is 273with the given id, and if C<$delflag> is true, delete the file after it
263no longer necessary. 274is no longer necessary. If you are certain of the part number, you can
275specify it as the last argument.
276
277A better (usually faster) way of doing this is using the C<SetFNameFilter>
278functionality.
264 279
265=item $retval = Smerge $pass 280=item $retval = Smerge $pass
266 281
267If you are desperate, try to call C<Smerge> with increasing C<$pass> 282If you are desperate, try to call C<Smerge> with increasing C<$pass>
268values, beginning at C<0>, to try to merge parts that usually would not 283values, beginning at C<0>, to try to merge parts that usually would not
269have been merged. 284have been merged.
270 285
271Most probably this will result in garbled files, so never do this by 286Most probably this will result in garbled files, so never do this by
272default. 287default, except:
288
289If the C<OPT_AUTOCHECK> option has been disabled (by default it is
290enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
291after loading all files as an additional pre-pass (which is normally done
292by C<LoadFile>).
273 293
274=item $item = GetFileListItem $item_number 294=item $item = GetFileListItem $item_number
275 295
276Return the C<$item> structure for the C<$item_number>'th found file, or 296Return the C<$item> structure for the C<$item_number>'th found file, or
277C<undef> of no file with that number exists. 297C<undef> of no file with that number exists.
278 298
279The first file has number C<0>, and the series has no holes, so you can 299The first file has number C<0>, and the series has no holes, so you can
280iterate over all files by starting with zero and incrementing until you 300iterate over all files by starting with zero and incrementing until you
281hit C<undef>. 301hit C<undef>.
282 302
303This function has to walk the linear list of fils on each access, so
304if you want to iterate over all items, it is usually faster to use
305C<GetFileList>.
306
307=item @items = GetFileList
308
309Similar to C<GetFileListItem>, but returns all files in one go.
310
283=back 311=back
284 312
285=head2 Decoding files 313=head2 Decoding files
286 314
287=over 4 315=over
288 316
289=item $retval = $item->rename($newname) 317=item $retval = $item->rename ($newname)
290 318
291Change the ondisk filename where the decoded file will be saved. 319Change the ondisk filename where the decoded file will be saved.
292 320
293=item $retval = $item->decode_temp 321=item $retval = $item->decode_temp
294 322
297 325
298=item $retval = $item->remove_temp 326=item $retval = $item->remove_temp
299 327
300Remove the temporarily decoded file again. 328Remove the temporarily decoded file again.
301 329
302=item $retval = $item->decode([$target_path]) 330=item $retval = $item->decode ([$target_path])
303 331
304Decode the file to it's destination, or the given target path. 332Decode the file to its destination, or the given target path.
305 333
306=item $retval = $item->info(callback-function) 334=item $retval = $item->info (callback-function)
307 335
308=back 336=back
309 337
310=head2 Querying (and setting) item attributes 338=head2 Querying (and setting) item attributes
311 339
312=over 4 340=over
313 341
314=item $state = $item->state 342=item $state = $item->state
315 343
316=item $mode = $item->mode([newmode]) 344=item $mode = $item->mode ([newmode])
317 345
318=item $uudet = $item->uudet 346=item $uudet = $item->uudet
319 347
320=item $size = $item->size 348=item $size = $item->size
321 349
322=item $filename = $item->filename([newfilename}) 350=item $filename = $item->filename ([newfilename})
323 351
324=item $subfname = $item->subfname 352=item $subfname = $item->subfname
325 353
326=item $mimeid = $item->mimeid 354=item $mimeid = $item->mimeid
327 355
331 359
332=back 360=back
333 361
334=head2 Information about source parts 362=head2 Information about source parts
335 363
336=over 4 364=over
337 365
338=item $parts = $item->parts 366=item $parts = $item->parts
339 367
340Return information about all parts (source files) used to decode the file 368Return information about all parts (source files) used to decode the file
341as a list of hashrefs with the following structure: 369as a list of hashrefs with the following structure:
355Usually you are interested mostly the C<sfname> and possibly the C<partno> 383Usually you are interested mostly the C<sfname> and possibly the C<partno>
356and C<filename> members. 384and C<filename> members.
357 385
358=back 386=back
359 387
360=head2 Functions below not documented and not very well tested 388=head2 Functions below are not documented and not very well tested - feedback welcome
361 389
362 QuickDecode 390 QuickDecode
363 EncodeMulti 391 EncodeMulti
364 EncodePartial 392 EncodePartial
365 EncodeToStream 393 EncodeToStream
369 397
370=head2 EXTENSION FUNCTIONS 398=head2 EXTENSION FUNCTIONS
371 399
372Functions found in this module but not documented in the uulib documentation: 400Functions found in this module but not documented in the uulib documentation:
373 401
374=over 4 402=over
375 403
376=item $msg = straction ACT_xxx 404=item $msg = straction ACT_xxx
377 405
378Return a human readable string representing the given action code. 406Return a human readable string representing the given action code.
379 407
423=head1 LARGE EXAMPLE DECODER 451=head1 LARGE EXAMPLE DECODER
424 452
425This is the file C<example-decoder> from the distribution, put here 453This is the file C<example-decoder> from the distribution, put here
426instead of more thorough documentation. 454instead of more thorough documentation.
427 455
456 #!/usr/bin/perl
457
428 # decode all the files in the directory uusrc/ and copy 458 # decode all the files in the directory uusrc/ and copy
429 # the resulting files to uudst/ 459 # the resulting files to uudst/
430 460
431 use Convert::UUlib ':all'; 461 use Convert::UUlib ':all';
432 462
433 sub namefilter { 463 sub namefilter {
434 my($path)=@_; 464 my ($path) = @_;
465
435 $path=~s/^.*[\/\\]//; 466 $path=~s/^.*[\/\\]//;
467
436 $path; 468 $path
437 } 469 }
438 470
439 sub busycb { 471 sub busycb {
440 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 472 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
441 $_[0]=straction($action); 473 $_[0]=straction($action);
442 print "busy_callback(", (join ",",@_), ")\n"; 474 print "busy_callback(", (join ",",@_), ")\n";
443 0; 475 0
444 } 476 }
445 477
478 SetOption OPT_RBUF, 128*1024;
479 SetOption OPT_WBUF, 1024*1024;
446 SetOption OPT_IGNMODE, 1; 480 SetOption OPT_IGNMODE, 1;
481 SetOption OPT_IGNMODE, 1;
447 SetOption OPT_VERBOSE, 1; 482 SetOption OPT_VERBOSE, 1;
448 483
449 # show the three ways you can set callback functions. I normally 484 # show the three ways you can set callback functions. I normally
450 # prefer the one with the sub inplace. 485 # prefer the one with the sub inplace.
451 SetFNameFilter \&namefilter; 486 SetFNameFilter \&namefilter;
452 487
453 SetBusyCallback "busycb", 333; 488 SetBusyCallback "busycb", 333;
454 489
455 SetMsgCallback sub { 490 SetMsgCallback sub {
456 my ($msg, $level) = @_; 491 my ($msg, $level) = @_;
457 print uc strmsglevel $_[1], ": $msg\n"; 492 print uc strmsglevel $_[1], ": $msg\n";
458 }; 493 };
459 494
460 # the following non-trivial FileNameCallback takes care 495 # the following non-trivial FileNameCallback takes care
461 # of some subject lines not detected properly by uulib: 496 # of some subject lines not detected properly by uulib:
462 SetFileNameCallback sub { 497 SetFileNameCallback sub {
463 return unless $_[1]; # skip "Re:"-plies et al. 498 return unless $_[1]; # skip "Re:"-plies et al.
464 local $_ = $_[0]; 499 local $_ = $_[0];
465 500
466 # the following rules are rather effective on some newsgroups, 501 # the following rules are rather effective on some newsgroups,
467 # like alt.binaries.games.anime, where non-mime, uuencoded data 502 # like alt.binaries.games.anime, where non-mime, uuencoded data
468 # is very common 503 # is very common
469 504
470 # if we find some *.rar, take it as the filename 505 # if we find some *.rar, take it as the filename
471 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 506 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
472 507
473 # one common subject format 508 # one common subject format
474 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 509 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
475 510
476 # - filename.par (04/55) 511 # - filename.par (04/55)
477 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 512 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
478 513
479 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 514 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
480 # - (20 files) No.17 Roseanne.jpg [2/2] 515 # - (20 files) No.17 Roseanne.jpg [2/2]
481 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 516 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
482 517
518 # try to detect some common forms of filenames
519 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
520
483 # otherwise just pass what we have 521 # otherwise just pass what we have
484 return (); 522 ()
485 }; 523 };
486 524
487 # now read all files in the directory uusrc/* 525 # now read all files in the directory uusrc/*
488 for(<uusrc/*>) { 526 for (<uusrc/*>) {
489 my($retval,$count)=LoadFile ($_, $_, 1); 527 my ($retval, $count) = LoadFile ($_, $_, 1);
490 print "file($_), status(", strerror $retval, ") parts($count)\n"; 528 print "file($_), status(", strerror $retval, ") parts($count)\n";
491 } 529 }
492 530
493 SetOption OPT_SAVEPATH, "uudst/"; 531 SetOption OPT_SAVEPATH, "uudst/";
494 532
495 # now wade through all files and their source parts 533 # now wade through all files and their source parts
496 $i = 0; 534 for my $uu (GetFileList) {
497 while ($uu = GetFileListItem($i)) { 535 print "file ", $uu->filename, "\n";
498 $i++;
499 print "file nr. $i";
500 print " state ", $uu->state; 536 print " state ", $uu->state, "\n";
501 print " mode ", $uu->mode; 537 print " mode ", $uu->mode, "\n";
502 print " uudet ", strencoding $uu->uudet; 538 print " uudet ", strencoding $uu->uudet, "\n";
503 print " size ", $uu->size; 539 print " size ", $uu->size, "\n";
504 print " filename ", $uu->filename;
505 print " subfname ", $uu->subfname; 540 print " subfname ", $uu->subfname, "\n";
506 print " mimeid ", $uu->mimeid; 541 print " mimeid ", $uu->mimeid, "\n";
507 print " mimetype ", $uu->mimetype; 542 print " mimetype ", $uu->mimetype, "\n";
508 print "\n";
509 543
510 # print additional info about all parts 544 # print additional info about all parts
545 print " parts";
511 for ($uu->parts) { 546 for ($uu->parts) {
512 while (my ($k, $v) = each %$_) { 547 for my $k (sort keys %$_) {
513 print "$k > $v, "; 548 print " $k=$_->{$k}";
514 } 549 }
515 print "\n"; 550 print "\n";
516 } 551 }
517 552
518 $uu->decode_temp;
519 print " temporarily decoded to ", $uu->binfile, "\n";
520 $uu->remove_temp; 553 $uu->remove_temp;
521 554
522 print strerror $uu->decode; 555 if (my $err = $uu->decode) {
556 print " ERROR ", strerror $err, "\n";
557 } else {
523 print " saved as uudst/", $uu->filename, "\n"; 558 print " successfully saved as uudst/", $uu->filename, "\n";
524 } 559 }
560 }
525 561
526 print "cleanup...\n"; 562 print "cleanup...\n";
527 563
528 CleanUp(); 564 CleanUp;
565
566=head1 PERLMULTICORE SUPPORT
567
568This module supports the perlmulticore standard (see
569L<http://perlmulticore.schmorp.de/> for more info) for the following
570functions - generally these are functions accessing the disk and/or using
571considerable CPU time:
572
573 LoadFile
574 $item->decode
575 $item->decode_temp
576 $item->remove_temp
577 $item->info
578
579The perl interpreter will be reacquired/released on every callback
580invocation, so for performance reasons, callbacks should be avoided if
581that is costly.
582
583Future versions might enable multicore support for more functions.
584
585=head1 BUGS AND LIMITATIONS
586
587The original uulib library this module uses was written at a time where
588main memory of measured in megabytes and buffer overflows as a security
589thign didn't exist. While a lot of security fixes have been applied over
590the years (includign some defense in depth mechanism that can shield
591against a lot of as-of-yet undetected bugs), using this library for
592security purposes requires care.
593
594Likewise, file sizes when the uulib library was written were tiny compared
595to today, so do not expect this library to handle files larger than 2GB.
529 596
530=head1 AUTHOR 597=head1 AUTHOR
531 598
532Marc Lehmann <pcg@goof.com>, the original uulib library was written 599Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
533by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 600by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
534bugfixed by Marc Lehmann. 601bugfixed by Marc Lehmann.
535 602
536=head1 SEE ALSO 603=head1 SEE ALSO
537 604
538perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 605perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
539 606
540=cut 607=cut
608

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines