ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.12 by root, Sun Oct 13 13:51:00 2002 UTC vs.
Revision 1.39 by root, Wed Oct 28 08:05:04 2009 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3no warnings;
4use strict;
2 5
3use Carp; 6use Carp;
4 7
5require Exporter; 8require Exporter;
6require DynaLoader; 9require DynaLoader;
7use AutoLoader;
8 10
9$VERSION = 0.3; 11our $VERSION = '1.33';
10 12
11@ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
12 14
13@_consts = qw( 15our @_consts = qw(
14 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
15 17
16 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
17 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
18 20
19 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
20 22
23 OPT_RBUF OPT_WBUF
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
25 28
26 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28 31
29 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED 33 XX_ENCODED UU_ENCODED YENC_ENCODED
31); 34);
32 35
33@_funcs = qw( 36our @_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
37 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39 E_PrepPartial 42 E_PrepPartial
40 43
41 straction strencoding strmsglevel 44 straction strencoding strmsglevel
42); 45);
43 46
44@EXPORT = @_consts; 47our @EXPORT = @_consts;
45@EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
46%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47 50
48bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
49 52
50Initialize(); 53Initialize();
51 54
52# not when < 5.005_6x 55# not when < 5.005_6x
53# END { CleanUp() } 56# END { CleanUp() }
54 57
55for (@_consts) { 58for (@_consts) {
56 my $constant = constant($_); 59 my $constant = constant($_);
60 no strict 'refs';
57 *$_ = sub () { $constant }; 61 *$_ = sub () { $constant };
58} 62}
59 63
60# action code -> string mapping 64# action code -> string mapping
61sub straction($) { 65sub straction($) {
99=head1 SYNOPSIS 103=head1 SYNOPSIS
100 104
101 use Convert::UUlib ':all'; 105 use Convert::UUlib ':all';
102 106
103 # read all the files named on the commandline and decode them 107 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example.
104 LoadFile($_) for @ARGV; 109 LoadFile $_ for @ARGV;
105 for($i=0; $uu=GetFileListItem($i); $i++) { 110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
106 $uu->decode if $uu->state & FILE_OK; 111 if ($uu->state & FILE_OK) {
112 $uu->decode;
113 print $uu->filename, "\n";
114 }
107 } 115 }
108 116
109=head1 DESCRIPTION 117=head1 DESCRIPTION
110 118
111Read the file doc/library.pdf from the distribution for in-depth 119Read the file doc/library.pdf from the distribution for in-depth
148 OPT_PROGRESS retrieve progress information 156 OPT_PROGRESS retrieve progress information
149 OPT_USETEXT handle text messages 157 OPT_USETEXT handle text messages
150 OPT_PREAMB handle Mime preambles/epilogues 158 OPT_PREAMB handle Mime preambles/epilogues
151 OPT_TINYB64 detect short B64 outside of Mime 159 OPT_TINYB64 detect short B64 outside of Mime
152 OPT_ENCEXT extension for single-part encoded files 160 OPT_ENCEXT extension for single-part encoded files
153 OPT_REMOVE remove input files after decoding 161 OPT_REMOVE remove input files after decoding (dangerous)
154 OPT_MOREMIME strict MIME adherence 162 OPT_MOREMIME strict MIME adherence
155 OPT_DOTDOT .. unescaping has not yet been done on input files 163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes
166 OPT_AUTOCHECK automatically check file list after every loadfile
156 167
157=head2 Result/Error codes 168=head2 Result/Error codes
158 169
159 RET_OK everything went fine 170 RET_OK everything went fine
160 RET_IOERR I/O Error - examine errno 171 RET_IOERR I/O Error - examine errno
171 182
172 This code is zero, i.e. "false": 183 This code is zero, i.e. "false":
173 184
174 UUFILE_READ Read in, but not further processed 185 UUFILE_READ Read in, but not further processed
175 186
176 The following state codes are ored together: 187 The following state codes are or'ed together:
177 188
178 FILE_MISPART Missing Part(s) detected 189 FILE_MISPART Missing Part(s) detected
179 FILE_NOBEGIN No 'begin' found 190 FILE_NOBEGIN No 'begin' found
180 FILE_NOEND No 'end' found 191 FILE_NOEND No 'end' found
181 FILE_NODATA File does not contain valid uudata 192 FILE_NODATA File does not contain valid uudata
197=head1 EXPORTED FUNCTIONS 208=head1 EXPORTED FUNCTIONS
198 209
199=head2 Initializing and cleanup 210=head2 Initializing and cleanup
200 211
201Initialize is automatically called when the module is loaded and allocates 212Initialize is automatically called when the module is loaded and allocates
202quite a bit of memory. CleanUp releases that again. 213quite a small amount of memory for todays machines ;) CleanUp releases that
214again.
203 215
204 Initialize; # not normally necessary 216On my machine, a fairly complete decode with DBI backend needs about 10MB
217RSS to decode 20000 files.
218
219=over 4
220
221=item Initialize
222
223Not normally necessary, (re-)initializes the library.
224
225=item CleanUp
226
205 CleanUp; # could be called at the end to release memory 227Not normally necessary, could be called at the end to release memory
228before starting a new decoding round.
229
230=back
206 231
207=head2 Setting and querying options 232=head2 Setting and querying options
208 233
234=over 4
235
209 $option = GetOption OPT_xxx; 236=item $option = GetOption OPT_xxx
237
210 SetOption OPT_xxx, opt-value; 238=item SetOption OPT_xxx, opt-value
239
240=back
241
242See the C<OPT_xxx> constants above to see which options exist.
211 243
212=head2 Setting various callbacks 244=head2 Setting various callbacks
213 245
246=over 4
247
214 SetMsgCallback [callback-function]; 248=item SetMsgCallback [callback-function]
249
215 SetBusyCallback [callback-function]; 250=item SetBusyCallback [callback-function]
251
216 SetFileCallback [callback-function]; 252=item SetFileCallback [callback-function]
253
217 SetFNameFilter [callback-function]; 254=item SetFNameFilter [callback-function]
255
256=back
218 257
219=head2 Call the currently selected FNameFilter 258=head2 Call the currently selected FNameFilter
220 259
260=over 4
261
221 $file = FNameFilter $file; 262=item $file = FNameFilter $file
263
264=back
222 265
223=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 266=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
224 267
268=over 4
269
225 ($retval, $count) = LoadFile $fname, [$id, [$delflag]]; 270=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
271
272Load the given file and scan it for encoded contents. Optionally tag it
273with the given id, and if C<$delflag> is true, delete the file after it
274is no longer necessary. If you are certain of the part number, you can
275specify it as the last argument.
276
277A better (usually faster) way of doing this is using the C<SetFNameFilter>
278functionality.
279
226 $retval = Smerge $pass; 280=item $retval = Smerge $pass
281
282If you are desperate, try to call C<Smerge> with increasing C<$pass>
283values, beginning at C<0>, to try to merge parts that usually would not
284have been merged.
285
286Most probably this will result in garbled files, so never do this by
287default, except:
288
289If the C<OPT_AUTOCHECK> option has been disabled (by default it is
290enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
291after loading all files as an additional pre-pass (which is normally done
292by C<LoadFile>).
293
227 $item = GetFileListItem $item_number; 294=item $item = GetFileListItem $item_number
228 295
229=head2 The procedural interface is undocumented, use the following methods instead 296Return the C<$item> structure for the C<$item_number>'th found file, or
297C<undef> of no file with that number exists.
230 298
299The first file has number C<0>, and the series has no holes, so you can
300iterate over all files by starting with zero and incrementing until you
301hit C<undef>.
302
303=back
304
305=head2 Decoding files
306
307=over 4
308
231 $retval = $item->rename($newname); 309=item $retval = $item->rename($newname)
310
311Change the ondisk filename where the decoded file will be saved.
312
232 $retval = $item->decode_temp; 313=item $retval = $item->decode_temp
314
315Decode the file into a temporary location, use C<< $item->infile >> to
316retrieve the temporary filename.
317
233 $retval = $item->remove_temp; 318=item $retval = $item->remove_temp
319
320Remove the temporarily decoded file again.
321
234 $retval = $item->decode([$target_path]); 322=item $retval = $item->decode([$target_path])
323
324Decode the file to it's destination, or the given target path.
325
235 $retval = $item->info(callback-function); 326=item $retval = $item->info(callback-function)
327
328=back
236 329
237=head2 Querying (and setting) item attributes 330=head2 Querying (and setting) item attributes
238 331
332=over 4
333
239 $state = $item->state; 334=item $state = $item->state
335
240 $mode = $item->mode([newmode]); 336=item $mode = $item->mode([newmode])
337
241 $uudet = $item->uudet; 338=item $uudet = $item->uudet
339
242 $size = $item->size; 340=item $size = $item->size
341
243 $filename = $item->filename([newfilename}); 342=item $filename = $item->filename([newfilename})
343
244 $subfname = $item->subfname; 344=item $subfname = $item->subfname
345
245 $mimeid = $item->mimeid; 346=item $mimeid = $item->mimeid
347
246 $mimetype = $item->mimetype; 348=item $mimetype = $item->mimetype
349
247 $binfile = $item->binfile; 350=item $binfile = $item->binfile
248 351
249=head2 Totally undocumented but well tested ;) 352=back
250 353
354=head2 Information about source parts
355
356=over 4
357
251 $parts = $item->parts; 358=item $parts = $item->parts
359
360Return information about all parts (source files) used to decode the file
361as a list of hashrefs with the following structure:
362
363 {
364 partno => <integer describing the part number, starting with 1>,
365 # the following member sonly exist when they contain useful information
366 sfname => <local pathname of the file where this part is from>,
367 filename => <the ondisk filename of the decoded file>,
368 subfname => <used to cluster postings, possibly the posting filename>,
369 subject => <the subject of the posting/mail>,
370 origin => <the possible source (From) address>,
371 mimetype => <the possible mimetype of the decoded file>,
372 mimeid => <the id part of the Content-Type>,
373 }
374
375Usually you are interested mostly the C<sfname> and possibly the C<partno>
376and C<filename> members.
377
378=back
252 379
253=head2 Functions below not documented and not very well tested 380=head2 Functions below not documented and not very well tested
254 381
255 QuickDecode 382 QuickDecode
256 EncodeMulti 383 EncodeMulti
316=head1 LARGE EXAMPLE DECODER 443=head1 LARGE EXAMPLE DECODER
317 444
318This is the file C<example-decoder> from the distribution, put here 445This is the file C<example-decoder> from the distribution, put here
319instead of more thorough documentation. 446instead of more thorough documentation.
320 447
448 #!/usr/bin/perl
449
321 # decode all the files in the directory uusrc/ and copy 450 # decode all the files in the directory uusrc/ and copy
322 # the resulting files to uudst/ 451 # the resulting files to uudst/
323 452
324 use Convert::UUlib ':all'; 453 use Convert::UUlib ':all';
325 454
326 sub namefilter { 455 sub namefilter {
327 my($path)=@_; 456 my ($path) = @_;
457
328 $path=~s/^.*[\/\\]//; 458 $path=~s/^.*[\/\\]//;
459
329 $path; 460 $path
330 } 461 }
331 462
332 sub busycb { 463 sub busycb {
333 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 464 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
334 $_[0]=straction($action); 465 $_[0]=straction($action);
335 print "busy_callback(", (join ",",@_), ")\n"; 466 print "busy_callback(", (join ",",@_), ")\n";
336 0; 467 0
337 } 468 }
338 469
470 SetOption OPT_RBUF, 128*1024;
471 SetOption OPT_WBUF, 1024*1024;
339 SetOption OPT_IGNMODE, 1; 472 SetOption OPT_IGNMODE, 1;
473 SetOption OPT_IGNMODE, 1;
340 SetOption OPT_VERBOSE, 1; 474 SetOption OPT_VERBOSE, 1;
341 475
342 # show the three ways you can set callback functions. I normally 476 # show the three ways you can set callback functions. I normally
343 # prefer the one with the sub inplace. 477 # prefer the one with the sub inplace.
344 SetFNameFilter \&namefilter; 478 SetFNameFilter \&namefilter;
345 479
346 SetBusyCallback "busycb", 333; 480 SetBusyCallback "busycb", 333;
347 481
348 SetMsgCallback sub { 482 SetMsgCallback sub {
349 my ($msg, $level) = @_; 483 my ($msg, $level) = @_;
350 print uc strmsglevel $_[1], ": $msg\n"; 484 print uc strmsglevel $_[1], ": $msg\n";
351 }; 485 };
352 486
353 # the following non-trivial FileNameCallback takes care 487 # the following non-trivial FileNameCallback takes care
354 # of some subject lines not detected properly by uulib: 488 # of some subject lines not detected properly by uulib:
355 SetFileNameCallback sub { 489 SetFileNameCallback sub {
356 return unless $_[1]; # skip "Re:"-plies et al. 490 return unless $_[1]; # skip "Re:"-plies et al.
357 local $_ = $_[0]; 491 local $_ = $_[0];
358 492
359 # the following rules are rather effective on some newsgroups, 493 # the following rules are rather effective on some newsgroups,
360 # like alt.binaries.games.anime, where non-mime, uuencoded data 494 # like alt.binaries.games.anime, where non-mime, uuencoded data
361 # is very common 495 # is very common
362 496
363 # if we find some *.rar, take it as the filename 497 # if we find some *.rar, take it as the filename
364 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 498 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
365 499
366 # one common subject format 500 # one common subject format
367 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 501 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
368 502
369 # - filename.par (04/55) 503 # - filename.par (04/55)
370 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 504 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
371 505
372 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 506 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
373 # - (20 files) No.17 Roseanne.jpg [2/2] 507 # - (20 files) No.17 Roseanne.jpg [2/2]
374 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 508 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
375 509
510 # try to detect some common forms of filenames
511 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
512
376 # otherwise just pass what we have 513 # otherwise just pass what we have
377 return (); 514 ()
378 }; 515 };
379 516
380 # now read all files in the directory uusrc/* 517 # now read all files in the directory uusrc/*
381 for(<uusrc/*>) { 518 for(<uusrc/*>) {
382 my($retval,$count)=LoadFile ($_, $_, 1); 519 my ($retval, $count) = LoadFile ($_, $_, 1);
383 print "file($_), status(", strerror $retval, ") parts($count)\n"; 520 print "file($_), status(", strerror $retval, ") parts($count)\n";
384 } 521 }
385 522
386 SetOption OPT_SAVEPATH, "uudst/"; 523 SetOption OPT_SAVEPATH, "uudst/";
387 524
388 # now wade through all files and their source parts 525 # now wade through all files and their source parts
389 $i = 0; 526 $i = 0;
390 while ($uu = GetFileListItem($i)) { 527 while ($uu = GetFileListItem $i) {
391 $i++; 528 $i++;
392 print "file nr. $i"; 529 print "file nr. $i";
393 print " state ", $uu->state; 530 print " state ", $uu->state;
394 print " mode ", $uu->mode; 531 print " mode ", $uu->mode;
395 print " uudet ", strencoding $uu->uudet; 532 print " uudet ", strencoding $uu->uudet;
396 print " size ", $uu->size; 533 print " size ", $uu->size;
397 print " filename ", $uu->filename; 534 print " filename ", $uu->filename;
398 print " subfname ", $uu->subfname; 535 print " subfname ", $uu->subfname;
399 print " mimeid ", $uu->mimeid; 536 print " mimeid ", $uu->mimeid;
400 print " mimetype ", $uu->mimetype; 537 print " mimetype ", $uu->mimetype;
401 print "\n"; 538 print "\n";
402 539
403 # print additional info about all parts 540 # print additional info about all parts
404 for ($uu->parts) { 541 for ($uu->parts) {
405 while (my ($k, $v) = each %$_) { 542 while (my ($k, $v) = each %$_) {
406 print "$k > $v, "; 543 print "$k > $v, ";
407 } 544 }
408 print "\n"; 545 print "\n";
409 } 546 }
410 547
411 $uu->decode_temp; 548 print $uu->filename;
412 print " temporarily decoded to ", $uu->binfile, "\n"; 549
413 $uu->remove_temp; 550 $uu->remove_temp;
414 551
415 print strerror $uu->decode; 552 if (my $err = $uu->decode ()) {
553 print ", ", strerror $err, "\n";
554 } else {
416 print " saved as uudst/", $uu->filename, "\n"; 555 print ", saved as uudst/", $uu->filename, "\n";
417 } 556 }
557 }
418 558
419 print "cleanup...\n"; 559 print "cleanup...\n";
420 560
421 CleanUp(); 561 CleanUp;
422 562
423=head1 AUTHOR 563=head1 AUTHOR
424 564
425Marc Lehmann <pcg@goof.com>, the original uulib library was written 565Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
426by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 566by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
427bugfixed by Marc Lehmann. 567bugfixed by Marc Lehmann.
428 568
429=head1 SEE ALSO 569=head1 SEE ALSO
430 570

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines