ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.12 by root, Sun Oct 13 13:51:00 2002 UTC vs.
Revision 1.47 by root, Fri Feb 28 16:57:25 2020 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3no warnings;
4use strict;
2 5
3use Carp; 6use Carp;
4 7
5require Exporter; 8require Exporter;
6require DynaLoader; 9require DynaLoader;
7use AutoLoader;
8 10
9$VERSION = 0.3; 11our $VERSION = 1.62;
10 12
11@ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
12 14
13@_consts = qw( 15our @_consts = qw(
14 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
15 17
16 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
17 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
18 20
19 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
20 22
23 OPT_RBUF OPT_WBUF
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
25 28
26 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28 31
29 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED 33 XX_ENCODED UU_ENCODED YENC_ENCODED
31); 34);
32 35
33@_funcs = qw( 36our @_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp
37 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39 E_PrepPartial 42 E_PrepPartial
40 43
41 straction strencoding strmsglevel 44 straction strencoding strmsglevel
42); 45);
43 46
44@EXPORT = @_consts; 47our @EXPORT = @_consts;
45@EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
46%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47 50
48bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
49 52
50Initialize(); 53Initialize();
51 54
52# not when < 5.005_6x 55# not when < 5.005_6x
53# END { CleanUp() } 56# END { CleanUp() }
54 57
55for (@_consts) { 58for (@_consts) {
56 my $constant = constant($_); 59 my $constant = constant($_);
60 no strict 'refs';
57 *$_ = sub () { $constant }; 61 *$_ = sub () { $constant };
58} 62}
59 63
60# action code -> string mapping 64# action code -> string mapping
61sub straction($) { 65sub straction($) {
99=head1 SYNOPSIS 103=head1 SYNOPSIS
100 104
101 use Convert::UUlib ':all'; 105 use Convert::UUlib ':all';
102 106
103 # read all the files named on the commandline and decode them 107 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example.
104 LoadFile($_) for @ARGV; 109 LoadFile $_ for @ARGV;
105 for($i=0; $uu=GetFileListItem($i); $i++) { 110 for my $uu (GetFileList) {
106 $uu->decode if $uu->state & FILE_OK; 111 if ($uu->state & FILE_OK) {
112 $uu->decode;
113 print $uu->filename, "\n";
114 }
107 } 115 }
108 116
109=head1 DESCRIPTION 117=head1 DESCRIPTION
110 118
111Read the file doc/library.pdf from the distribution for in-depth 119Read the file doc/library.pdf from the distribution for in-depth
148 OPT_PROGRESS retrieve progress information 156 OPT_PROGRESS retrieve progress information
149 OPT_USETEXT handle text messages 157 OPT_USETEXT handle text messages
150 OPT_PREAMB handle Mime preambles/epilogues 158 OPT_PREAMB handle Mime preambles/epilogues
151 OPT_TINYB64 detect short B64 outside of Mime 159 OPT_TINYB64 detect short B64 outside of Mime
152 OPT_ENCEXT extension for single-part encoded files 160 OPT_ENCEXT extension for single-part encoded files
153 OPT_REMOVE remove input files after decoding 161 OPT_REMOVE remove input files after decoding (dangerous)
154 OPT_MOREMIME strict MIME adherence 162 OPT_MOREMIME strict MIME adherence
155 OPT_DOTDOT .. unescaping has not yet been done on input files 163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes
166 OPT_AUTOCHECK automatically check file list after every loadfile
156 167
157=head2 Result/Error codes 168=head2 Result/Error codes
158 169
159 RET_OK everything went fine 170 RET_OK everything went fine
160 RET_IOERR I/O Error - examine errno 171 RET_IOERR I/O Error - examine errno
171 182
172 This code is zero, i.e. "false": 183 This code is zero, i.e. "false":
173 184
174 UUFILE_READ Read in, but not further processed 185 UUFILE_READ Read in, but not further processed
175 186
176 The following state codes are ored together: 187 The following state codes are or'ed together:
177 188
178 FILE_MISPART Missing Part(s) detected 189 FILE_MISPART Missing Part(s) detected
179 FILE_NOBEGIN No 'begin' found 190 FILE_NOBEGIN No 'begin' found
180 FILE_NOEND No 'end' found 191 FILE_NOEND No 'end' found
181 FILE_NODATA File does not contain valid uudata 192 FILE_NODATA File does not contain valid uudata
197=head1 EXPORTED FUNCTIONS 208=head1 EXPORTED FUNCTIONS
198 209
199=head2 Initializing and cleanup 210=head2 Initializing and cleanup
200 211
201Initialize is automatically called when the module is loaded and allocates 212Initialize is automatically called when the module is loaded and allocates
202quite a bit of memory. CleanUp releases that again. 213quite a small amount of memory for todays machines ;) CleanUp releases that
214again.
203 215
204 Initialize; # not normally necessary 216On my machine, a fairly complete decode with DBI backend needs about 10MB
217RSS to decode 20000 files.
218
219=over
220
221=item Initialize
222
223Not normally necessary, (re-)initializes the library.
224
225=item CleanUp
226
205 CleanUp; # could be called at the end to release memory 227Not normally necessary, could be called at the end to release memory
228before starting a new decoding round.
229
230=back
206 231
207=head2 Setting and querying options 232=head2 Setting and querying options
208 233
234=over
235
209 $option = GetOption OPT_xxx; 236=item $option = GetOption OPT_xxx
237
210 SetOption OPT_xxx, opt-value; 238=item SetOption OPT_xxx, opt-value
239
240=back
241
242See the C<OPT_xxx> constants above to see which options exist.
211 243
212=head2 Setting various callbacks 244=head2 Setting various callbacks
213 245
246=over
247
214 SetMsgCallback [callback-function]; 248=item SetMsgCallback [callback-function]
249
215 SetBusyCallback [callback-function]; 250=item SetBusyCallback [callback-function]
251
216 SetFileCallback [callback-function]; 252=item SetFileCallback [callback-function]
253
217 SetFNameFilter [callback-function]; 254=item SetFNameFilter [callback-function]
255
256=back
218 257
219=head2 Call the currently selected FNameFilter 258=head2 Call the currently selected FNameFilter
220 259
260=over
261
221 $file = FNameFilter $file; 262=item $file = FNameFilter $file
263
264=back
222 265
223=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 266=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
224 267
268=over
269
225 ($retval, $count) = LoadFile $fname, [$id, [$delflag]]; 270=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
271
272Load the given file and scan it for encoded contents. Optionally tag it
273with the given id, and if C<$delflag> is true, delete the file after it
274is no longer necessary. If you are certain of the part number, you can
275specify it as the last argument.
276
277A better (usually faster) way of doing this is using the C<SetFNameFilter>
278functionality.
279
226 $retval = Smerge $pass; 280=item $retval = Smerge $pass
281
282If you are desperate, try to call C<Smerge> with increasing C<$pass>
283values, beginning at C<0>, to try to merge parts that usually would not
284have been merged.
285
286Most probably this will result in garbled files, so never do this by
287default, except:
288
289If the C<OPT_AUTOCHECK> option has been disabled (by default it is
290enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
291after loading all files as an additional pre-pass (which is normally done
292by C<LoadFile>).
293
227 $item = GetFileListItem $item_number; 294=item $item = GetFileListItem $item_number
228 295
229=head2 The procedural interface is undocumented, use the following methods instead 296Return the C<$item> structure for the C<$item_number>'th found file, or
297C<undef> of no file with that number exists.
230 298
299The first file has number C<0>, and the series has no holes, so you can
300iterate over all files by starting with zero and incrementing until you
301hit C<undef>.
302
303This function has to walk the linear list of fils on each access, so
304if you want to iterate over all items, it is usually faster to use
305C<GetFileList>.
306
307=item @items = GetFileList
308
309Similar to C<GetFileListItem>, but returns all files in one go.
310
311=back
312
313=head2 Decoding files
314
315=over
316
231 $retval = $item->rename($newname); 317=item $retval = $item->rename ($newname)
318
319Change the ondisk filename where the decoded file will be saved.
320
232 $retval = $item->decode_temp; 321=item $retval = $item->decode_temp
322
323Decode the file into a temporary location, use C<< $item->infile >> to
324retrieve the temporary filename.
325
233 $retval = $item->remove_temp; 326=item $retval = $item->remove_temp
327
328Remove the temporarily decoded file again.
329
234 $retval = $item->decode([$target_path]); 330=item $retval = $item->decode ([$target_path])
331
332Decode the file to its destination, or the given target path.
333
235 $retval = $item->info(callback-function); 334=item $retval = $item->info (callback-function)
335
336=back
236 337
237=head2 Querying (and setting) item attributes 338=head2 Querying (and setting) item attributes
238 339
340=over
341
239 $state = $item->state; 342=item $state = $item->state
343
240 $mode = $item->mode([newmode]); 344=item $mode = $item->mode ([newmode])
345
241 $uudet = $item->uudet; 346=item $uudet = $item->uudet
347
242 $size = $item->size; 348=item $size = $item->size
349
243 $filename = $item->filename([newfilename}); 350=item $filename = $item->filename ([newfilename})
351
244 $subfname = $item->subfname; 352=item $subfname = $item->subfname
353
245 $mimeid = $item->mimeid; 354=item $mimeid = $item->mimeid
355
246 $mimetype = $item->mimetype; 356=item $mimetype = $item->mimetype
357
247 $binfile = $item->binfile; 358=item $binfile = $item->binfile
248 359
249=head2 Totally undocumented but well tested ;) 360=back
250 361
362=head2 Information about source parts
363
364=over
365
251 $parts = $item->parts; 366=item $parts = $item->parts
252 367
368Return information about all parts (source files) used to decode the file
369as a list of hashrefs with the following structure:
370
371 {
372 partno => <integer describing the part number, starting with 1>,
373 # the following member sonly exist when they contain useful information
374 sfname => <local pathname of the file where this part is from>,
375 filename => <the ondisk filename of the decoded file>,
376 subfname => <used to cluster postings, possibly the posting filename>,
377 subject => <the subject of the posting/mail>,
378 origin => <the possible source (From) address>,
379 mimetype => <the possible mimetype of the decoded file>,
380 mimeid => <the id part of the Content-Type>,
381 }
382
383Usually you are interested mostly the C<sfname> and possibly the C<partno>
384and C<filename> members.
385
386=back
387
253=head2 Functions below not documented and not very well tested 388=head2 Functions below are not documented and not very well tested - feedback welcome
254 389
255 QuickDecode 390 QuickDecode
256 EncodeMulti 391 EncodeMulti
257 EncodePartial 392 EncodePartial
258 EncodeToStream 393 EncodeToStream
262 397
263=head2 EXTENSION FUNCTIONS 398=head2 EXTENSION FUNCTIONS
264 399
265Functions found in this module but not documented in the uulib documentation: 400Functions found in this module but not documented in the uulib documentation:
266 401
267=over 4 402=over
268 403
269=item $msg = straction ACT_xxx 404=item $msg = straction ACT_xxx
270 405
271Return a human readable string representing the given action code. 406Return a human readable string representing the given action code.
272 407
316=head1 LARGE EXAMPLE DECODER 451=head1 LARGE EXAMPLE DECODER
317 452
318This is the file C<example-decoder> from the distribution, put here 453This is the file C<example-decoder> from the distribution, put here
319instead of more thorough documentation. 454instead of more thorough documentation.
320 455
456 #!/usr/bin/perl
457
321 # decode all the files in the directory uusrc/ and copy 458 # decode all the files in the directory uusrc/ and copy
322 # the resulting files to uudst/ 459 # the resulting files to uudst/
323 460
324 use Convert::UUlib ':all'; 461 use Convert::UUlib ':all';
325 462
326 sub namefilter { 463 sub namefilter {
327 my($path)=@_; 464 my ($path) = @_;
465
328 $path=~s/^.*[\/\\]//; 466 $path=~s/^.*[\/\\]//;
467
329 $path; 468 $path
330 } 469 }
331 470
332 sub busycb { 471 sub busycb {
333 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 472 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
334 $_[0]=straction($action); 473 $_[0]=straction($action);
335 print "busy_callback(", (join ",",@_), ")\n"; 474 print "busy_callback(", (join ",",@_), ")\n";
336 0; 475 0
337 } 476 }
338 477
478 SetOption OPT_RBUF, 128*1024;
479 SetOption OPT_WBUF, 1024*1024;
339 SetOption OPT_IGNMODE, 1; 480 SetOption OPT_IGNMODE, 1;
481 SetOption OPT_IGNMODE, 1;
340 SetOption OPT_VERBOSE, 1; 482 SetOption OPT_VERBOSE, 1;
341 483
342 # show the three ways you can set callback functions. I normally 484 # show the three ways you can set callback functions. I normally
343 # prefer the one with the sub inplace. 485 # prefer the one with the sub inplace.
344 SetFNameFilter \&namefilter; 486 SetFNameFilter \&namefilter;
345 487
346 SetBusyCallback "busycb", 333; 488 SetBusyCallback "busycb", 333;
347 489
348 SetMsgCallback sub { 490 SetMsgCallback sub {
349 my ($msg, $level) = @_; 491 my ($msg, $level) = @_;
350 print uc strmsglevel $_[1], ": $msg\n"; 492 print uc strmsglevel $_[1], ": $msg\n";
351 }; 493 };
352 494
353 # the following non-trivial FileNameCallback takes care 495 # the following non-trivial FileNameCallback takes care
354 # of some subject lines not detected properly by uulib: 496 # of some subject lines not detected properly by uulib:
355 SetFileNameCallback sub { 497 SetFileNameCallback sub {
356 return unless $_[1]; # skip "Re:"-plies et al. 498 return unless $_[1]; # skip "Re:"-plies et al.
357 local $_ = $_[0]; 499 local $_ = $_[0];
358 500
359 # the following rules are rather effective on some newsgroups, 501 # the following rules are rather effective on some newsgroups,
360 # like alt.binaries.games.anime, where non-mime, uuencoded data 502 # like alt.binaries.games.anime, where non-mime, uuencoded data
361 # is very common 503 # is very common
362 504
363 # if we find some *.rar, take it as the filename 505 # if we find some *.rar, take it as the filename
364 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 506 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
365 507
366 # one common subject format 508 # one common subject format
367 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 509 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
368 510
369 # - filename.par (04/55) 511 # - filename.par (04/55)
370 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 512 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
371 513
372 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 514 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
373 # - (20 files) No.17 Roseanne.jpg [2/2] 515 # - (20 files) No.17 Roseanne.jpg [2/2]
374 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 516 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
375 517
518 # try to detect some common forms of filenames
519 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
520
376 # otherwise just pass what we have 521 # otherwise just pass what we have
377 return (); 522 ()
378 }; 523 };
379 524
380 # now read all files in the directory uusrc/* 525 # now read all files in the directory uusrc/*
381 for(<uusrc/*>) { 526 for (<uusrc/*>) {
382 my($retval,$count)=LoadFile ($_, $_, 1); 527 my ($retval, $count) = LoadFile ($_, $_, 1);
383 print "file($_), status(", strerror $retval, ") parts($count)\n"; 528 print "file($_), status(", strerror $retval, ") parts($count)\n";
384 } 529 }
385 530
386 SetOption OPT_SAVEPATH, "uudst/"; 531 SetOption OPT_SAVEPATH, "uudst/";
387 532
388 # now wade through all files and their source parts 533 # now wade through all files and their source parts
389 $i = 0; 534 for my $uu (GetFileList) {
390 while ($uu = GetFileListItem($i)) { 535 print "file ", $uu->filename, "\n";
391 $i++;
392 print "file nr. $i";
393 print " state ", $uu->state; 536 print " state ", $uu->state, "\n";
394 print " mode ", $uu->mode; 537 print " mode ", $uu->mode, "\n";
395 print " uudet ", strencoding $uu->uudet; 538 print " uudet ", strencoding $uu->uudet, "\n";
396 print " size ", $uu->size; 539 print " size ", $uu->size, "\n";
397 print " filename ", $uu->filename;
398 print " subfname ", $uu->subfname; 540 print " subfname ", $uu->subfname, "\n";
399 print " mimeid ", $uu->mimeid; 541 print " mimeid ", $uu->mimeid, "\n";
400 print " mimetype ", $uu->mimetype; 542 print " mimetype ", $uu->mimetype, "\n";
401 print "\n";
402 543
403 # print additional info about all parts 544 # print additional info about all parts
545 print " parts";
404 for ($uu->parts) { 546 for ($uu->parts) {
405 while (my ($k, $v) = each %$_) { 547 for my $k (sort keys %$_) {
406 print "$k > $v, "; 548 print " $k=$_->{$k}";
407 } 549 }
408 print "\n"; 550 print "\n";
409 } 551 }
410 552
411 $uu->decode_temp;
412 print " temporarily decoded to ", $uu->binfile, "\n";
413 $uu->remove_temp; 553 $uu->remove_temp;
414 554
415 print strerror $uu->decode; 555 if (my $err = $uu->decode) {
556 print " ERROR ", strerror $err, "\n";
557 } else {
416 print " saved as uudst/", $uu->filename, "\n"; 558 print " successfully saved as uudst/", $uu->filename, "\n";
417 } 559 }
560 }
418 561
419 print "cleanup...\n"; 562 print "cleanup...\n";
420 563
421 CleanUp(); 564 CleanUp;
565
566=head1 PERLMULTICORE SUPPORT
567
568This module supports the perlmulticore standard (see
569L<http://perlmulticore.schmorp.de/> for more info) for the following
570functions - generally these are functions accessing the disk and/or using
571considerable CPU time:
572
573 LoadFile
574 $item->decode
575 $item->decode_temp
576 $item->remove_temp
577 $item->info
578
579The perl interpreter will be reacquired/released on every callback
580invocation, so for performance reasons, callbacks should be avoided if
581that is costly.
582
583Future versions might enable multicore support for more functions.
584
585=head1 BUGS AND LIMITATIONS
586
587The original uulib library this module uses was written at a time where
588main memory of measured in megabytes and buffer overflows as a security
589thign didn't exist. While a lot of security fixes have been applied over
590the years (includign some defense in depth mechanism that can shield
591against a lot of as-of-yet undetected bugs), using this library for
592security purposes requires care.
593
594Likewise, file sizes when the uulib library was written were tiny compared
595to today, so do not expect this library to handle files larger than 2GB.
422 596
423=head1 AUTHOR 597=head1 AUTHOR
424 598
425Marc Lehmann <pcg@goof.com>, the original uulib library was written 599Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
426by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 600by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
427bugfixed by Marc Lehmann. 601bugfixed by Marc Lehmann.
428 602
429=head1 SEE ALSO 603=head1 SEE ALSO
430 604
431perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 605perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>.
432 606
433=cut 607=cut
608

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines