ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.11 by root, Sun Oct 13 13:47:09 2002 UTC vs.
Revision 1.33 by root, Mon Oct 13 12:13:43 2008 UTC

1package Convert::UUlib; 1package Convert::UUlib;
2
3no warnings;
4use strict;
2 5
3use Carp; 6use Carp;
4 7
5require Exporter; 8require Exporter;
6require DynaLoader; 9require DynaLoader;
7use AutoLoader;
8 10
9$VERSION = 0.214; 11our $VERSION = '1.12';
10 12
11@ISA = qw(Exporter DynaLoader); 13our @ISA = qw(Exporter DynaLoader);
12 14
13@_consts = qw( 15our @_consts = qw(
14 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
15 17
16 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA 18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
17 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE 19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
18 20
19 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
20 22
23 OPT_RBUF OPT_WBUF
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT 27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT
25 28
28 31
29 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED 33 XX_ENCODED UU_ENCODED YENC_ENCODED
31); 34);
32 35
33@_funcs = qw( 36our @_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback 38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp 39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
37 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti 40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38 EncodePartial EncodeToStream EncodeToFile E_PrepSingle 41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39 E_PrepPartial 42 E_PrepPartial
40 43
41 straction strencoding strmsglevel 44 straction strencoding strmsglevel
42); 45);
43 46
44@EXPORT = @_consts; 47our @EXPORT = @_consts;
45@EXPORT_OK = @_funcs; 48our @EXPORT_OK = @_funcs;
46%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); 49our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47 50
48bootstrap Convert::UUlib $VERSION; 51bootstrap Convert::UUlib $VERSION;
49 52
50Initialize(); 53Initialize();
51 54
52# not when < 5.005_6x 55# not when < 5.005_6x
53# END { CleanUp() } 56# END { CleanUp() }
54 57
55for (@_consts) { 58for (@_consts) {
56 my $constant = constant($_); 59 my $constant = constant($_);
60 no strict 'refs';
57 *$_ = sub () { $constant }; 61 *$_ = sub () { $constant };
58} 62}
59 63
60# action code -> string mapping 64# action code -> string mapping
61sub straction($) { 65sub straction($) {
99=head1 SYNOPSIS 103=head1 SYNOPSIS
100 104
101 use Convert::UUlib ':all'; 105 use Convert::UUlib ':all';
102 106
103 # read all the files named on the commandline and decode them 107 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example.
104 LoadFile($_) for @ARGV; 109 LoadFile $_ for @ARGV;
105 for($i=0; $uu=GetFileListItem($i); $i++) { 110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
106 $uu->decode if $uu->state & FILE_OK; 111 if ($uu->state & FILE_OK) {
112 $uu->decode;
113 print $uu->filename, "\n";
114 }
107 } 115 }
108 116
109=head1 DESCRIPTION 117=head1 DESCRIPTION
110 118
111Read the file doc/library.pdf from the distribution for in-depth 119Read the file doc/library.pdf from the distribution for in-depth
148 OPT_PROGRESS retrieve progress information 156 OPT_PROGRESS retrieve progress information
149 OPT_USETEXT handle text messages 157 OPT_USETEXT handle text messages
150 OPT_PREAMB handle Mime preambles/epilogues 158 OPT_PREAMB handle Mime preambles/epilogues
151 OPT_TINYB64 detect short B64 outside of Mime 159 OPT_TINYB64 detect short B64 outside of Mime
152 OPT_ENCEXT extension for single-part encoded files 160 OPT_ENCEXT extension for single-part encoded files
153 OPT_REMOVE remove input files after decoding 161 OPT_REMOVE remove input files after decoding (dangerous)
154 OPT_MOREMIME strict MIME adherence 162 OPT_MOREMIME strict MIME adherence
155 OPT_DOTDOT .. unescaping has not yet been done on input files 163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL*
165 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL*
156 166
157=head2 Result/Error codes 167=head2 Result/Error codes
158 168
159 RET_OK everything went fine 169 RET_OK everything went fine
160 RET_IOERR I/O Error - examine errno 170 RET_IOERR I/O Error - examine errno
171 181
172 This code is zero, i.e. "false": 182 This code is zero, i.e. "false":
173 183
174 UUFILE_READ Read in, but not further processed 184 UUFILE_READ Read in, but not further processed
175 185
176 The following state codes are ored together: 186 The following state codes are or'ed together:
177 187
178 FILE_MISPART Missing Part(s) detected 188 FILE_MISPART Missing Part(s) detected
179 FILE_NOBEGIN No 'begin' found 189 FILE_NOBEGIN No 'begin' found
180 FILE_NOEND No 'end' found 190 FILE_NOEND No 'end' found
181 FILE_NODATA File does not contain valid uudata 191 FILE_NODATA File does not contain valid uudata
197=head1 EXPORTED FUNCTIONS 207=head1 EXPORTED FUNCTIONS
198 208
199=head2 Initializing and cleanup 209=head2 Initializing and cleanup
200 210
201Initialize is automatically called when the module is loaded and allocates 211Initialize is automatically called when the module is loaded and allocates
202quite a bit of memory. CleanUp releases that again. 212quite a small amount of memory for todays machines ;) CleanUp releases that
213again.
203 214
204 Initialize; # not normally necessary 215On my machine, a fairly complete decode with DBI backend needs about 10MB
216RSS to decode 20000 files.
217
218=over 4
219
220=item Initialize
221
222Not normally necessary, (re-)initializes the library.
223
224=item CleanUp
225
205 CleanUp; # could be called at the end to release memory 226Not normally necessary, could be called at the end to release memory
227before starting a new decoding round.
228
229=back
206 230
207=head2 Setting and querying options 231=head2 Setting and querying options
208 232
233=over 4
234
209 $option = GetOption OPT_xxx; 235=item $option = GetOption OPT_xxx
236
210 SetOption OPT_xxx, opt-value; 237=item SetOption OPT_xxx, opt-value
238
239=back
240
241See the C<OPT_xxx> constants above to see which options exist.
211 242
212=head2 Setting various callbacks 243=head2 Setting various callbacks
213 244
245=over 4
246
214 SetMsgCallback [callback-function]; 247=item SetMsgCallback [callback-function]
248
215 SetBusyCallback [callback-function]; 249=item SetBusyCallback [callback-function]
250
216 SetFileCallback [callback-function]; 251=item SetFileCallback [callback-function]
252
217 SetFNameFilter [callback-function]; 253=item SetFNameFilter [callback-function]
254
255=back
218 256
219=head2 Call the currently selected FNameFilter 257=head2 Call the currently selected FNameFilter
220 258
259=over 4
260
221 $file = FNameFilter $file; 261=item $file = FNameFilter $file
262
263=back
222 264
223=head2 Loading sourcefiles, optionally fuzzy merge and start decoding 265=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
224 266
267=over 4
268
225 ($retval, $count) = LoadFile $fname, [$id, [$delflag]]; 269=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
270
271Load the given file and scan it for encoded contents. Optionally tag it
272with the given id, and if C<$delflag> is true, delete the file after it
273is no longer necessary. If you are certain of the part number, you can
274specify it as the last argument.
275
276A better (usually faster) way of doing this is using the C<SetFNameFilter>
277functionality.
278
226 $retval = Smerge $pass; 279=item $retval = Smerge $pass
280
281If you are desperate, try to call C<Smerge> with increasing C<$pass>
282values, beginning at C<0>, to try to merge parts that usually would not
283have been merged.
284
285Most probably this will result in garbled files, so never do this by
286default.
287
227 $item = GetFileListItem $item_number; 288=item $item = GetFileListItem $item_number
228 289
229=head2 The procedural interface is undocumented, use the following methods instead 290Return the C<$item> structure for the C<$item_number>'th found file, or
291C<undef> of no file with that number exists.
230 292
293The first file has number C<0>, and the series has no holes, so you can
294iterate over all files by starting with zero and incrementing until you
295hit C<undef>.
296
297=back
298
299=head2 Decoding files
300
301=over 4
302
231 $retval = $item->rename($newname); 303=item $retval = $item->rename($newname)
304
305Change the ondisk filename where the decoded file will be saved.
306
232 $retval = $item->decode_temp; 307=item $retval = $item->decode_temp
308
309Decode the file into a temporary location, use C<< $item->infile >> to
310retrieve the temporary filename.
311
233 $retval = $item->remove_temp; 312=item $retval = $item->remove_temp
313
314Remove the temporarily decoded file again.
315
234 $retval = $item->decode([$target_path]); 316=item $retval = $item->decode([$target_path])
317
318Decode the file to it's destination, or the given target path.
319
235 $retval = $item->info(callback-function); 320=item $retval = $item->info(callback-function)
321
322=back
236 323
237=head2 Querying (and setting) item attributes 324=head2 Querying (and setting) item attributes
238 325
326=over 4
327
239 $state = $item->state; 328=item $state = $item->state
329
240 $mode = $item->mode([newmode]); 330=item $mode = $item->mode([newmode])
331
241 $uudet = $item->uudet; 332=item $uudet = $item->uudet
333
242 $size = $item->size; 334=item $size = $item->size
335
243 $filename = $item->filename([newfilename}); 336=item $filename = $item->filename([newfilename})
337
244 $subfname = $item->subfname; 338=item $subfname = $item->subfname
339
245 $mimeid = $item->mimeid; 340=item $mimeid = $item->mimeid
341
246 $mimetype = $item->mimetype; 342=item $mimetype = $item->mimetype
343
247 $binfile = $item->binfile; 344=item $binfile = $item->binfile
248 345
249=head2 Totally undocumented but well tested ;) 346=back
250 347
348=head2 Information about source parts
349
350=over 4
351
251 $parts = $item->parts; 352=item $parts = $item->parts
353
354Return information about all parts (source files) used to decode the file
355as a list of hashrefs with the following structure:
356
357 {
358 partno => <integer describing the part number, starting with 1>,
359 # the following member sonly exist when they contain useful information
360 sfname => <local pathname of the file where this part is from>,
361 filename => <the ondisk filename of the decoded file>,
362 subfname => <used to cluster postings, possibly the posting filename>,
363 subject => <the subject of the posting/mail>,
364 origin => <the possible source (From) address>,
365 mimetype => <the possible mimetype of the decoded file>,
366 mimeid => <the id part of the Content-Type>,
367 }
368
369Usually you are interested mostly the C<sfname> and possibly the C<partno>
370and C<filename> members.
371
372=back
252 373
253=head2 Functions below not documented and not very well tested 374=head2 Functions below not documented and not very well tested
254 375
255 QuickDecode 376 QuickDecode
256 EncodeMulti 377 EncodeMulti
316=head1 LARGE EXAMPLE DECODER 437=head1 LARGE EXAMPLE DECODER
317 438
318This is the file C<example-decoder> from the distribution, put here 439This is the file C<example-decoder> from the distribution, put here
319instead of more thorough documentation. 440instead of more thorough documentation.
320 441
442 #!/usr/bin/perl
443
321 # decode all the files in the directory uusrc/ and copy 444 # decode all the files in the directory uusrc/ and copy
322 # the resulting files to uudst/ 445 # the resulting files to uudst/
323 446
324 use Convert::UUlib ':all'; 447 use Convert::UUlib ':all';
325 448
326 sub namefilter { 449 sub namefilter {
327 my($path)=@_; 450 my ($path) = @_;
451
328 $path=~s/^.*[\/\\]//; 452 $path=~s/^.*[\/\\]//;
453
329 $path; 454 $path
330 } 455 }
331 456
332 sub busycb { 457 sub busycb {
333 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; 458 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
334 $_[0]=straction($action); 459 $_[0]=straction($action);
335 print "busy_callback(", (join ",",@_), ")\n"; 460 print "busy_callback(", (join ",",@_), ")\n";
336 0; 461 0
337 } 462 }
338 463
464 SetOption OPT_RBUF, 128*1024;
465 SetOption OPT_WBUF, 1024*1024;
339 SetOption OPT_IGNMODE, 1; 466 SetOption OPT_IGNMODE, 1;
467 SetOption OPT_IGNMODE, 1;
340 SetOption OPT_VERBOSE, 1; 468 SetOption OPT_VERBOSE, 1;
341 469
342 # show the three ways you can set callback functions. I normally 470 # show the three ways you can set callback functions. I normally
343 # prefer the one with the sub inplace. 471 # prefer the one with the sub inplace.
344 SetFNameFilter \&namefilter; 472 SetFNameFilter \&namefilter;
345 473
346 SetBusyCallback "busycb", 333; 474 SetBusyCallback "busycb", 333;
347 475
348 SetMsgCallback sub { 476 SetMsgCallback sub {
349 my ($msg, $level) = @_; 477 my ($msg, $level) = @_;
350 print uc strmsglevel $_[1], ": $msg\n"; 478 print uc strmsglevel $_[1], ": $msg\n";
351 }; 479 };
352 480
353 # the following non-trivial FileNameCallback takes care 481 # the following non-trivial FileNameCallback takes care
354 # of some subject lines not detected properly by uulib: 482 # of some subject lines not detected properly by uulib:
355 SetFileNameCallback sub { 483 SetFileNameCallback sub {
356 return unless $_[1]; # skip "Re:"-plies et al. 484 return unless $_[1]; # skip "Re:"-plies et al.
357 local $_ = $_[0]; 485 local $_ = $_[0];
358 486
359 # the following rules are rather effective on some newsgroups, 487 # the following rules are rather effective on some newsgroups,
360 # like alt.binaries.games.anime, where non-mime, uuencoded data 488 # like alt.binaries.games.anime, where non-mime, uuencoded data
361 # is very common 489 # is very common
362 490
363 # if we find some *.rar, take it as the filename 491 # if we find some *.rar, take it as the filename
364 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; 492 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
365 493
366 # one common subject format 494 # one common subject format
367 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; 495 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
368 496
369 # - filename.par (04/55) 497 # - filename.par (04/55)
370 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; 498 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
371 499
372 # - (xxx) No. 1 sayuri81.jpg 756565 bytes 500 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
373 # - (20 files) No.17 Roseanne.jpg [2/2] 501 # - (20 files) No.17 Roseanne.jpg [2/2]
374 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; 502 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
375 503
504 # try to detect some common forms of filenames
505 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
506
376 # otherwise just pass what we have 507 # otherwise just pass what we have
377 return (); 508 ()
378 }; 509 };
379 510
380 # now read all files in the directory uusrc/* 511 # now read all files in the directory uusrc/*
381 for(<uusrc/*>) { 512 for(<uusrc/*>) {
382 my($retval,$count)=LoadFile ($_, $_, 1); 513 my ($retval, $count) = LoadFile ($_, $_, 1);
383 print "file($_), status(", strerror $retval, ") parts($count)\n"; 514 print "file($_), status(", strerror $retval, ") parts($count)\n";
384 } 515 }
385 516
386 SetOption OPT_SAVEPATH, "uudst/"; 517 SetOption OPT_SAVEPATH, "uudst/";
387 518
388 # now wade through all files and their source parts 519 # now wade through all files and their source parts
389 $i = 0; 520 $i = 0;
390 while ($uu = GetFileListItem($i)) { 521 while ($uu = GetFileListItem $i) {
391 $i++; 522 $i++;
392 print "file nr. $i"; 523 print "file nr. $i";
393 print " state ", $uu->state; 524 print " state ", $uu->state;
394 print " mode ", $uu->mode; 525 print " mode ", $uu->mode;
395 print " uudet ", strencoding $uu->uudet; 526 print " uudet ", strencoding $uu->uudet;
396 print " size ", $uu->size; 527 print " size ", $uu->size;
397 print " filename ", $uu->filename; 528 print " filename ", $uu->filename;
398 print " subfname ", $uu->subfname; 529 print " subfname ", $uu->subfname;
399 print " mimeid ", $uu->mimeid; 530 print " mimeid ", $uu->mimeid;
400 print " mimetype ", $uu->mimetype; 531 print " mimetype ", $uu->mimetype;
401 print "\n"; 532 print "\n";
402 533
403 # print additional info about all parts 534 # print additional info about all parts
404 for ($uu->parts) { 535 for ($uu->parts) {
405 while (my ($k, $v) = each %$_) { 536 while (my ($k, $v) = each %$_) {
406 print "$k > $v, "; 537 print "$k > $v, ";
407 } 538 }
408 print "\n"; 539 print "\n";
409 } 540 }
410 541
411 $uu->decode_temp; 542 print $uu->filename;
412 print " temporarily decoded to ", $uu->binfile, "\n"; 543
413 $uu->remove_temp; 544 $uu->remove_temp;
414 545
415 print strerror $uu->decode; 546 if (my $err = $uu->decode ()) {
547 print ", ", strerror $err, "\n";
548 } else {
416 print " saved as uudst/", $uu->filename, "\n"; 549 print ", saved as uudst/", $uu->filename, "\n";
417 } 550 }
551 }
418 552
419 print "cleanup...\n"; 553 print "cleanup...\n";
420 554
421 CleanUp(); 555 CleanUp;
422 556
423=head1 AUTHOR 557=head1 AUTHOR
424 558
425Marc Lehmann <pcg@goof.com>, the original uulib library was written 559Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
426by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily 560by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
427bugfixed by Marc Lehmann. 561bugfixed by Marc Lehmann.
428 562
429=head1 SEE ALSO 563=head1 SEE ALSO
430 564

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines