ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.30 by root, Fri Jun 13 13:27:51 2008 UTC vs.
Revision 1.47 by root, Fri Feb 28 16:57:25 2020 UTC

6use Carp; 6use Carp;
7 7
8require Exporter; 8require Exporter;
9require DynaLoader; 9require DynaLoader;
10 10
11our $VERSION = '1.10'; 11our $VERSION = 1.62;
12 12
13our @ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
14 14
15our @_consts = qw( 15our @_consts = qw(
16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
22 22
23 OPT_RBUF OPT_WBUF 23 OPT_RBUF OPT_WBUF
24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
28 28
29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
31 31
32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
34); 34);
35 35
36our @_funcs = qw( 36our @_funcs = qw(
37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
42 E_PrepPartial 42 E_PrepPartial
43 43
44 straction strencoding strmsglevel 44 straction strencoding strmsglevel
105 use Convert::UUlib ':all'; 105 use Convert::UUlib ':all';
106 106
107 # read all the files named on the commandline and decode them 107 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example. 108 # into the CURRENT directory. See below for a longer example.
109 LoadFile $_ for @ARGV; 109 LoadFile $_ for @ARGV;
110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) { 110 for my $uu (GetFileList) {
111 if ($uu->state & FILE_OK) { 111 if ($uu->state & FILE_OK) {
112 $uu->decode; 112 $uu->decode;
113 print $uu->filename, "\n"; 113 print $uu->filename, "\n";
114 } 114 }
115 } 115 }
159 OPT_TINYB64 detect short B64 outside of Mime 159 OPT_TINYB64 detect short B64 outside of Mime
160 OPT_ENCEXT extension for single-part encoded files 160 OPT_ENCEXT extension for single-part encoded files
161 OPT_REMOVE remove input files after decoding (dangerous) 161 OPT_REMOVE remove input files after decoding (dangerous)
162 OPT_MOREMIME strict MIME adherence 162 OPT_MOREMIME strict MIME adherence
163 OPT_DOTDOT ".."-unescaping has not yet been done on input files 163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL* 164 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL* 165 OPT_WBUF set default write I/O buffer size in bytes
166 OPT_AUTOCHECK automatically check file list after every loadfile
166 167
167=head2 Result/Error codes 168=head2 Result/Error codes
168 169
169 RET_OK everything went fine 170 RET_OK everything went fine
170 RET_IOERR I/O Error - examine errno 171 RET_IOERR I/O Error - examine errno
213again. 214again.
214 215
215On my machine, a fairly complete decode with DBI backend needs about 10MB 216On my machine, a fairly complete decode with DBI backend needs about 10MB
216RSS to decode 20000 files. 217RSS to decode 20000 files.
217 218
218=over 4 219=over
219 220
220=item Initialize 221=item Initialize
221 222
222Not normally necessary, (re-)initializes the library. 223Not normally necessary, (re-)initializes the library.
223 224
228 229
229=back 230=back
230 231
231=head2 Setting and querying options 232=head2 Setting and querying options
232 233
233=over 4 234=over
234 235
235=item $option = GetOption OPT_xxx 236=item $option = GetOption OPT_xxx
236 237
237=item SetOption OPT_xxx, opt-value 238=item SetOption OPT_xxx, opt-value
238 239
240 241
241See the C<OPT_xxx> constants above to see which options exist. 242See the C<OPT_xxx> constants above to see which options exist.
242 243
243=head2 Setting various callbacks 244=head2 Setting various callbacks
244 245
245=over 4 246=over
246 247
247=item SetMsgCallback [callback-function] 248=item SetMsgCallback [callback-function]
248 249
249=item SetBusyCallback [callback-function] 250=item SetBusyCallback [callback-function]
250 251
254 255
255=back 256=back
256 257
257=head2 Call the currently selected FNameFilter 258=head2 Call the currently selected FNameFilter
258 259
259=over 4 260=over
260 261
261=item $file = FNameFilter $file 262=item $file = FNameFilter $file
262 263
263=back 264=back
264 265
265=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 266=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
266 267
267=over 4 268=over
268 269
269=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] 270=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
270 271
271Load the given file and scan it for encoded contents. Optionally tag it 272Load the given file and scan it for encoded contents. Optionally tag it
272with the given id, and if C<$delflag> is true, delete the file after it 273with the given id, and if C<$delflag> is true, delete the file after it
281If you are desperate, try to call C<Smerge> with increasing C<$pass> 282If you are desperate, try to call C<Smerge> with increasing C<$pass>
282values, beginning at C<0>, to try to merge parts that usually would not 283values, beginning at C<0>, to try to merge parts that usually would not
283have been merged. 284have been merged.
284 285
285Most probably this will result in garbled files, so never do this by 286Most probably this will result in garbled files, so never do this by
286default. 287default, except:
288
289If the C<OPT_AUTOCHECK> option has been disabled (by default it is
290enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
291after loading all files as an additional pre-pass (which is normally done
292by C<LoadFile>).
287 293
288=item $item = GetFileListItem $item_number 294=item $item = GetFileListItem $item_number
289 295
290Return the C<$item> structure for the C<$item_number>'th found file, or 296Return the C<$item> structure for the C<$item_number>'th found file, or
291C<undef> of no file with that number exists. 297C<undef> of no file with that number exists.
292 298
293The first file has number C<0>, and the series has no holes, so you can 299The first file has number C<0>, and the series has no holes, so you can
294iterate over all files by starting with zero and incrementing until you 300iterate over all files by starting with zero and incrementing until you
295hit C<undef>. 301hit C<undef>.
296 302
303This function has to walk the linear list of fils on each access, so
304if you want to iterate over all items, it is usually faster to use
305C<GetFileList>.
306
307=item @items = GetFileList
308
309Similar to C<GetFileListItem>, but returns all files in one go.
310
297=back 311=back
298 312
299=head2 Decoding files 313=head2 Decoding files
300 314
301=over 4 315=over
302 316
303=item $retval = $item->rename($newname) 317=item $retval = $item->rename ($newname)
304 318
305Change the ondisk filename where the decoded file will be saved. 319Change the ondisk filename where the decoded file will be saved.
306 320
307=item $retval = $item->decode_temp 321=item $retval = $item->decode_temp
308 322
311 325
312=item $retval = $item->remove_temp 326=item $retval = $item->remove_temp
313 327
314Remove the temporarily decoded file again. 328Remove the temporarily decoded file again.
315 329
316=item $retval = $item->decode([$target_path]) 330=item $retval = $item->decode ([$target_path])
317 331
318Decode the file to it's destination, or the given target path. 332Decode the file to its destination, or the given target path.
319 333
320=item $retval = $item->info(callback-function) 334=item $retval = $item->info (callback-function)
321 335
322=back 336=back
323 337
324=head2 Querying (and setting) item attributes 338=head2 Querying (and setting) item attributes
325 339
326=over 4 340=over
327 341
328=item $state = $item->state 342=item $state = $item->state
329 343
330=item $mode = $item->mode([newmode]) 344=item $mode = $item->mode ([newmode])
331 345
332=item $uudet = $item->uudet 346=item $uudet = $item->uudet
333 347
334=item $size = $item->size 348=item $size = $item->size
335 349
336=item $filename = $item->filename([newfilename}) 350=item $filename = $item->filename ([newfilename})
337 351
338=item $subfname = $item->subfname 352=item $subfname = $item->subfname
339 353
340=item $mimeid = $item->mimeid 354=item $mimeid = $item->mimeid
341 355
345 359
346=back 360=back
347 361
348=head2 Information about source parts 362=head2 Information about source parts
349 363
350=over 4 364=over
351 365
352=item $parts = $item->parts 366=item $parts = $item->parts
353 367
354Return information about all parts (source files) used to decode the file 368Return information about all parts (source files) used to decode the file
355as a list of hashrefs with the following structure: 369as a list of hashrefs with the following structure:
369Usually you are interested mostly the C<sfname> and possibly the C<partno> 383Usually you are interested mostly the C<sfname> and possibly the C<partno>
370and C<filename> members. 384and C<filename> members.
371 385
372=back 386=back
373 387
374=head2 Functions below not documented and not very well tested 388=head2 Functions below are not documented and not very well tested - feedback welcome
375 389
376 QuickDecode 390 QuickDecode
377 EncodeMulti 391 EncodeMulti
378 EncodePartial 392 EncodePartial
379 EncodeToStream 393 EncodeToStream
383 397
384=head2 EXTENSION FUNCTIONS 398=head2 EXTENSION FUNCTIONS
385 399
386Functions found in this module but not documented in the uulib documentation: 400Functions found in this module but not documented in the uulib documentation:
387 401
388=over 4 402=over
389 403
390=item $msg = straction ACT_xxx 404=item $msg = straction ACT_xxx
391 405
392Return a human readable string representing the given action code. 406Return a human readable string representing the given action code.
393 407
437=head1 LARGE EXAMPLE DECODER 451=head1 LARGE EXAMPLE DECODER
438 452
439This is the file C<example-decoder> from the distribution, put here 453This is the file C<example-decoder> from the distribution, put here
440instead of more thorough documentation. 454instead of more thorough documentation.
441 455
456 #!/usr/bin/perl
457
442 # decode all the files in the directory uusrc/ and copy 458 # decode all the files in the directory uusrc/ and copy
443 # the resulting files to uudst/ 459 # the resulting files to uudst/
444 460
445 use Convert::UUlib ':all'; 461 use Convert::UUlib ':all';
446 462
447 sub namefilter { 463 sub namefilter {
448 my($path)=@_; 464 my ($path) = @_;
465
449 $path=~s/^.*[\/\\]//; 466 $path=~s/^.*[\/\\]//;
467
450 $path; 468 $path
451 } 469 }
452 470
453 sub busycb { 471 sub busycb {
454 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 472 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
455 $_[0]=straction($action); 473 $_[0]=straction($action);
456 print "busy_callback(", (join ",",@_), ")\n"; 474 print "busy_callback(", (join ",",@_), ")\n";
457 0; 475 0
458 } 476 }
459 477
478 SetOption OPT_RBUF, 128*1024;
479 SetOption OPT_WBUF, 1024*1024;
460 SetOption OPT_IGNMODE, 1; 480 SetOption OPT_IGNMODE, 1;
481 SetOption OPT_IGNMODE, 1;
461 SetOption OPT_VERBOSE, 1; 482 SetOption OPT_VERBOSE, 1;
462 483
463 # show the three ways you can set callback functions. I normally 484 # show the three ways you can set callback functions. I normally
464 # prefer the one with the sub inplace. 485 # prefer the one with the sub inplace.
465 SetFNameFilter \&namefilter; 486 SetFNameFilter \&namefilter;
466 487
467 SetBusyCallback "busycb", 333; 488 SetBusyCallback "busycb", 333;
468 489
469 SetMsgCallback sub { 490 SetMsgCallback sub {
470 my ($msg, $level) = @_; 491 my ($msg, $level) = @_;
471 print uc strmsglevel $_[1], ": $msg\n"; 492 print uc strmsglevel $_[1], ": $msg\n";
472 }; 493 };
473 494
474 # the following non-trivial FileNameCallback takes care 495 # the following non-trivial FileNameCallback takes care
475 # of some subject lines not detected properly by uulib: 496 # of some subject lines not detected properly by uulib:
476 SetFileNameCallback sub { 497 SetFileNameCallback sub {
477 return unless $_[1]; # skip "Re:"-plies et al. 498 return unless $_[1]; # skip "Re:"-plies et al.
478 local $_ = $_[0]; 499 local $_ = $_[0];
479 500
480 # the following rules are rather effective on some newsgroups, 501 # the following rules are rather effective on some newsgroups,
481 # like alt.binaries.games.anime, where non-mime, uuencoded data 502 # like alt.binaries.games.anime, where non-mime, uuencoded data
482 # is very common 503 # is very common
483 504
484 # if we find some *.rar, take it as the filename 505 # if we find some *.rar, take it as the filename
485 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 506 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
486 507
487 # one common subject format 508 # one common subject format
488 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 509 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
489 510
490 # - filename.par (04/55) 511 # - filename.par (04/55)
491 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 512 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
492 513
493 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 514 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
494 # - (20 files) No.17 Roseanne.jpg [2/2] 515 # - (20 files) No.17 Roseanne.jpg [2/2]
495 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 516 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
496 517
518 # try to detect some common forms of filenames
519 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
520
497 # otherwise just pass what we have 521 # otherwise just pass what we have
498 return (); 522 ()
499 }; 523 };
500 524
501 # now read all files in the directory uusrc/* 525 # now read all files in the directory uusrc/*
502 for(<uusrc/*>) { 526 for (<uusrc/*>) {
503 my($retval,$count)=LoadFile ($_, $_, 1); 527 my ($retval, $count) = LoadFile ($_, $_, 1);
504 print "file($_), status(", strerror $retval, ") parts($count)\n"; 528 print "file($_), status(", strerror $retval, ") parts($count)\n";
505 } 529 }
506 530
507 SetOption OPT_SAVEPATH, "uudst/"; 531 SetOption OPT_SAVEPATH, "uudst/";
508 532
509 # now wade through all files and their source parts 533 # now wade through all files and their source parts
510 $i = 0; 534 for my $uu (GetFileList) {
511 while ($uu = GetFileListItem($i)) { 535 print "file ", $uu->filename, "\n";
512 $i++;
513 print "file nr. $i";
514 print " state ", $uu->state; 536 print " state ", $uu->state, "\n";
515 print " mode ", $uu->mode; 537 print " mode ", $uu->mode, "\n";
516 print " uudet ", strencoding $uu->uudet; 538 print " uudet ", strencoding $uu->uudet, "\n";
517 print " size ", $uu->size; 539 print " size ", $uu->size, "\n";
518 print " filename ", $uu->filename;
519 print " subfname ", $uu->subfname; 540 print " subfname ", $uu->subfname, "\n";
520 print " mimeid ", $uu->mimeid; 541 print " mimeid ", $uu->mimeid, "\n";
521 print " mimetype ", $uu->mimetype; 542 print " mimetype ", $uu->mimetype, "\n";
522 print "\n";
523 543
524 # print additional info about all parts 544 # print additional info about all parts
545 print " parts";
525 for ($uu->parts) { 546 for ($uu->parts) {
526 while (my ($k, $v) = each %$_) { 547 for my $k (sort keys %$_) {
527 print "$k > $v, "; 548 print " $k=$_->{$k}";
528 } 549 }
529 print "\n"; 550 print "\n";
530 } 551 }
531 552
532 $uu->decode_temp;
533 print " temporarily decoded to ", $uu->binfile, "\n";
534 $uu->remove_temp; 553 $uu->remove_temp;
535 554
536 print strerror $uu->decode; 555 if (my $err = $uu->decode) {
556 print " ERROR ", strerror $err, "\n";
557 } else {
537 print " saved as uudst/", $uu->filename, "\n"; 558 print " successfully saved as uudst/", $uu->filename, "\n";
538 } 559 }
560 }
539 561
540 print "cleanup...\n"; 562 print "cleanup...\n";
541 563
542 CleanUp(); 564 CleanUp;
565
566=head1 PERLMULTICORE SUPPORT
567
568This module supports the perlmulticore standard (see
569L<http://perlmulticore.schmorp.de/> for more info) for the following
570functions - generally these are functions accessing the disk and/or using
571considerable CPU time:
572
573 LoadFile
574 $item->decode
575 $item->decode_temp
576 $item->remove_temp
577 $item->info
578
579The perl interpreter will be reacquired/released on every callback
580invocation, so for performance reasons, callbacks should be avoided if
581that is costly.
582
583Future versions might enable multicore support for more functions.
584
585=head1 BUGS AND LIMITATIONS
586
587The original uulib library this module uses was written at a time where
588main memory of measured in megabytes and buffer overflows as a security
589thign didn't exist. While a lot of security fixes have been applied over
590the years (includign some defense in depth mechanism that can shield
591against a lot of as-of-yet undetected bugs), using this library for
592security purposes requires care.
593
594Likewise, file sizes when the uulib library was written were tiny compared
595to today, so do not expect this library to handle files larger than 2GB.
543 596
544=head1 AUTHOR 597=head1 AUTHOR
545 598
546Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written 599Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
547by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 600by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
548bugfixed by Marc Lehmann. 601bugfixed by Marc Lehmann.
549 602
550=head1 SEE ALSO 603=head1 SEE ALSO
551 604
552perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 605perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
553 606
554=cut 607=cut
608

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines