ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
Revision: 1.40
Committed: Wed Dec 15 00:10:59 2010 UTC (13 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-1_34
Changes since 1.39: +1 -1 lines
Log Message:
1.34

File Contents

# Content
1 package Convert::UUlib;
2
3 no warnings;
4 use strict;
5
6 use Carp;
7
8 require Exporter;
9 require DynaLoader;
10
11 our $VERSION = '1.34';
12
13 our @ISA = qw(Exporter DynaLoader);
14
15 our @_consts = qw(
16 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
17
18 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
19 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
20
21 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
22
23 OPT_RBUF OPT_WBUF
24 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
25 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
26 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
27 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK
28
29 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
30 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
31
32 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
33 XX_ENCODED UU_ENCODED YENC_ENCODED
34 );
35
36 our @_funcs = qw(
37 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
38 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
39 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
40 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
41 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
42 E_PrepPartial
43
44 straction strencoding strmsglevel
45 );
46
47 our @EXPORT = @_consts;
48 our @EXPORT_OK = @_funcs;
49 our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
50
51 bootstrap Convert::UUlib $VERSION;
52
53 Initialize();
54
55 # not when < 5.005_6x
56 # END { CleanUp() }
57
58 for (@_consts) {
59 my $constant = constant($_);
60 no strict 'refs';
61 *$_ = sub () { $constant };
62 }
63
64 # action code -> string mapping
65 sub straction($) {
66 return 'copying' if $_[0] == &ACT_COPYING;
67 return 'decoding' if $_[0] == &ACT_DECODING;
68 return 'encoding' if $_[0] == &ACT_ENCODING;
69 return 'idle' if $_[0] == &ACT_IDLE;
70 return 'scanning' if $_[0] == &ACT_SCANNING;
71 'unknown';
72 }
73
74 # encoding type -> string mapping
75 sub strencoding($) {
76 return 'uuencode' if $_[0] == &UU_ENCODED;
77 return 'base64' if $_[0] == &B64_ENCODED;
78 return 'yenc' if $_[0] == &YENC_ENCODED;
79 return 'binhex' if $_[0] == &BH_ENCODED;
80 return 'plaintext' if $_[0] == &PT_ENCODED;
81 return 'quoted-printable' if $_[0] == &QP_ENCODED;
82 return 'xxencode' if $_[0] == &XX_ENCODED;
83 'unknown';
84 }
85
86 sub strmsglevel($) {
87 return 'message' if $_[0] == &MSG_MESSAGE;
88 return 'note' if $_[0] == &MSG_NOTE;
89 return 'warning' if $_[0] == &MSG_WARNING;
90 return 'error' if $_[0] == &MSG_ERROR;
91 return 'panic' if $_[0] == &MSG_PANIC;
92 return 'fatal' if $_[0] == &MSG_FATAL;
93 'unknown';
94 }
95
96 1;
97 __END__
98
99 =head1 NAME
100
101 Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview).
102
103 =head1 SYNOPSIS
104
105 use Convert::UUlib ':all';
106
107 # read all the files named on the commandline and decode them
108 # into the CURRENT directory. See below for a longer example.
109 LoadFile $_ for @ARGV;
110 for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
111 if ($uu->state & FILE_OK) {
112 $uu->decode;
113 print $uu->filename, "\n";
114 }
115 }
116
117 =head1 DESCRIPTION
118
119 Read the file doc/library.pdf from the distribution for in-depth
120 information about the C-library used in this interface, and the rest of
121 this document and especially the non-trivial decoder program at the end.
122
123 =head1 EXPORTED CONSTANTS
124
125 =head2 Action code constants
126
127 ACT_IDLE we don't do anything
128 ACT_SCANNING scanning an input file
129 ACT_DECODING decoding into a temp file
130 ACT_COPYING copying temp to target
131 ACT_ENCODING encoding a file
132
133 =head2 Message severity levels
134
135 MSG_MESSAGE just a message, nothing important
136 MSG_NOTE something that should be noticed
137 MSG_WARNING important msg, processing continues
138 MSG_ERROR processing has been terminated
139 MSG_FATAL decoder cannot process further requests
140 MSG_PANIC recovery impossible, app must terminate
141
142 =head2 Options
143
144 OPT_VERSION version number MAJOR.MINORplPATCH (ro)
145 OPT_FAST assumes only one part per file
146 OPT_DUMBNESS switch off the program's intelligence
147 OPT_BRACKPOL give numbers in [] higher precendence
148 OPT_VERBOSE generate informative messages
149 OPT_DESPERATE try to decode incomplete files
150 OPT_IGNREPLY ignore RE:plies (off by default)
151 OPT_OVERWRITE whether it's OK to overwrite ex. files
152 OPT_SAVEPATH prefix to save-files on disk
153 OPT_IGNMODE ignore the original file mode
154 OPT_DEBUG print messages with FILE/LINE info
155 OPT_ERRNO get last error code for RET_IOERR (ro)
156 OPT_PROGRESS retrieve progress information
157 OPT_USETEXT handle text messages
158 OPT_PREAMB handle Mime preambles/epilogues
159 OPT_TINYB64 detect short B64 outside of Mime
160 OPT_ENCEXT extension for single-part encoded files
161 OPT_REMOVE remove input files after decoding (dangerous)
162 OPT_MOREMIME strict MIME adherence
163 OPT_DOTDOT ".."-unescaping has not yet been done on input files
164 OPT_RBUF set default read I/O buffer size in bytes
165 OPT_WBUF set default write I/O buffer size in bytes
166 OPT_AUTOCHECK automatically check file list after every loadfile
167
168 =head2 Result/Error codes
169
170 RET_OK everything went fine
171 RET_IOERR I/O Error - examine errno
172 RET_NOMEM not enough memory
173 RET_ILLVAL illegal value for operation
174 RET_NODATA decoder didn't find any data
175 RET_NOEND encoded data wasn't ended properly
176 RET_UNSUP unsupported function (encoding)
177 RET_EXISTS file exists (decoding)
178 RET_CONT continue -- special from ScanPart
179 RET_CANCEL operation canceled
180
181 =head2 File States
182
183 This code is zero, i.e. "false":
184
185 UUFILE_READ Read in, but not further processed
186
187 The following state codes are or'ed together:
188
189 FILE_MISPART Missing Part(s) detected
190 FILE_NOBEGIN No 'begin' found
191 FILE_NOEND No 'end' found
192 FILE_NODATA File does not contain valid uudata
193 FILE_OK All Parts found, ready to decode
194 FILE_ERROR Error while decoding
195 FILE_DECODED Successfully decoded
196 FILE_TMPFILE Temporary decoded file exists
197
198 =head2 Encoding types
199
200 UU_ENCODED UUencoded data
201 B64_ENCODED Mime-Base64 data
202 XX_ENCODED XXencoded data
203 BH_ENCODED Binhex encoded
204 PT_ENCODED Plain-Text encoded (MIME)
205 QP_ENCODED Quoted-Printable (MIME)
206 YENC_ENCODED yEnc encoded (non-MIME)
207
208 =head1 EXPORTED FUNCTIONS
209
210 =head2 Initializing and cleanup
211
212 Initialize is automatically called when the module is loaded and allocates
213 quite a small amount of memory for todays machines ;) CleanUp releases that
214 again.
215
216 On my machine, a fairly complete decode with DBI backend needs about 10MB
217 RSS to decode 20000 files.
218
219 =over 4
220
221 =item Initialize
222
223 Not normally necessary, (re-)initializes the library.
224
225 =item CleanUp
226
227 Not normally necessary, could be called at the end to release memory
228 before starting a new decoding round.
229
230 =back
231
232 =head2 Setting and querying options
233
234 =over 4
235
236 =item $option = GetOption OPT_xxx
237
238 =item SetOption OPT_xxx, opt-value
239
240 =back
241
242 See the C<OPT_xxx> constants above to see which options exist.
243
244 =head2 Setting various callbacks
245
246 =over 4
247
248 =item SetMsgCallback [callback-function]
249
250 =item SetBusyCallback [callback-function]
251
252 =item SetFileCallback [callback-function]
253
254 =item SetFNameFilter [callback-function]
255
256 =back
257
258 =head2 Call the currently selected FNameFilter
259
260 =over 4
261
262 =item $file = FNameFilter $file
263
264 =back
265
266 =head2 Loading sourcefiles, optionally fuzzy merge and start decoding
267
268 =over 4
269
270 =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
271
272 Load the given file and scan it for encoded contents. Optionally tag it
273 with the given id, and if C<$delflag> is true, delete the file after it
274 is no longer necessary. If you are certain of the part number, you can
275 specify it as the last argument.
276
277 A better (usually faster) way of doing this is using the C<SetFNameFilter>
278 functionality.
279
280 =item $retval = Smerge $pass
281
282 If you are desperate, try to call C<Smerge> with increasing C<$pass>
283 values, beginning at C<0>, to try to merge parts that usually would not
284 have been merged.
285
286 Most probably this will result in garbled files, so never do this by
287 default, except:
288
289 If the C<OPT_AUTOCHECK> option has been disabled (by default it is
290 enabled) to speed up file loading, then you I<have> to call C<Smerge -1>
291 after loading all files as an additional pre-pass (which is normally done
292 by C<LoadFile>).
293
294 =item $item = GetFileListItem $item_number
295
296 Return the C<$item> structure for the C<$item_number>'th found file, or
297 C<undef> of no file with that number exists.
298
299 The first file has number C<0>, and the series has no holes, so you can
300 iterate over all files by starting with zero and incrementing until you
301 hit C<undef>.
302
303 =back
304
305 =head2 Decoding files
306
307 =over 4
308
309 =item $retval = $item->rename($newname)
310
311 Change the ondisk filename where the decoded file will be saved.
312
313 =item $retval = $item->decode_temp
314
315 Decode the file into a temporary location, use C<< $item->infile >> to
316 retrieve the temporary filename.
317
318 =item $retval = $item->remove_temp
319
320 Remove the temporarily decoded file again.
321
322 =item $retval = $item->decode([$target_path])
323
324 Decode the file to it's destination, or the given target path.
325
326 =item $retval = $item->info(callback-function)
327
328 =back
329
330 =head2 Querying (and setting) item attributes
331
332 =over 4
333
334 =item $state = $item->state
335
336 =item $mode = $item->mode([newmode])
337
338 =item $uudet = $item->uudet
339
340 =item $size = $item->size
341
342 =item $filename = $item->filename([newfilename})
343
344 =item $subfname = $item->subfname
345
346 =item $mimeid = $item->mimeid
347
348 =item $mimetype = $item->mimetype
349
350 =item $binfile = $item->binfile
351
352 =back
353
354 =head2 Information about source parts
355
356 =over 4
357
358 =item $parts = $item->parts
359
360 Return information about all parts (source files) used to decode the file
361 as a list of hashrefs with the following structure:
362
363 {
364 partno => <integer describing the part number, starting with 1>,
365 # the following member sonly exist when they contain useful information
366 sfname => <local pathname of the file where this part is from>,
367 filename => <the ondisk filename of the decoded file>,
368 subfname => <used to cluster postings, possibly the posting filename>,
369 subject => <the subject of the posting/mail>,
370 origin => <the possible source (From) address>,
371 mimetype => <the possible mimetype of the decoded file>,
372 mimeid => <the id part of the Content-Type>,
373 }
374
375 Usually you are interested mostly the C<sfname> and possibly the C<partno>
376 and C<filename> members.
377
378 =back
379
380 =head2 Functions below not documented and not very well tested
381
382 QuickDecode
383 EncodeMulti
384 EncodePartial
385 EncodeToStream
386 EncodeToFile
387 E_PrepSingle
388 E_PrepPartial
389
390 =head2 EXTENSION FUNCTIONS
391
392 Functions found in this module but not documented in the uulib documentation:
393
394 =over 4
395
396 =item $msg = straction ACT_xxx
397
398 Return a human readable string representing the given action code.
399
400 =item $msg = strerror RET_xxx
401
402 Return a human readable string representing the given error code.
403
404 =item $str = strencoding xxx_ENCODED
405
406 Return the name of the encoding type as a string.
407
408 =item $str = strmsglevel MSG_xxx
409
410 Returns the message level as a string.
411
412 =item SetFileNameCallback $cb
413
414 Sets (or queries) the FileNameCallback, which is called whenever the
415 decoding library can't find a filename and wants to extract a filename
416 from the subject line of a posting. The callback will be called with
417 two arguments, the subject line and the current candidate for the
418 filename. The latter argument can be C<undef>, which means that no
419 filename could be found (and likely no one exists, so it is safe to also
420 return C<undef> in this case). If it doesn't return anything (not even
421 C<undef>!), then nothing happens, so this is a no-op callback:
422
423 sub cb {
424 return ();
425 }
426
427 If it returns C<undef>, then this indicates that no filename could be
428 found. In all other cases, the return value is taken to be the filename.
429
430 This is a slightly more useful callback:
431
432 sub cb {
433 return unless $_[1]; # skip "Re:"-plies et al.
434 my ($subject, $filename) = @_;
435 # if we find some *.rar, take it
436 return $1 if $subject =~ /(\w+\.rar)/;
437 # otherwise just pass what we have
438 return ();
439 }
440
441 =back
442
443 =head1 LARGE EXAMPLE DECODER
444
445 This is the file C<example-decoder> from the distribution, put here
446 instead of more thorough documentation.
447
448 #!/usr/bin/perl
449
450 # decode all the files in the directory uusrc/ and copy
451 # the resulting files to uudst/
452
453 use Convert::UUlib ':all';
454
455 sub namefilter {
456 my ($path) = @_;
457
458 $path=~s/^.*[\/\\]//;
459
460 $path
461 }
462
463 sub busycb {
464 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
465 $_[0]=straction($action);
466 print "busy_callback(", (join ",",@_), ")\n";
467 0
468 }
469
470 SetOption OPT_RBUF, 128*1024;
471 SetOption OPT_WBUF, 1024*1024;
472 SetOption OPT_IGNMODE, 1;
473 SetOption OPT_IGNMODE, 1;
474 SetOption OPT_VERBOSE, 1;
475
476 # show the three ways you can set callback functions. I normally
477 # prefer the one with the sub inplace.
478 SetFNameFilter \&namefilter;
479
480 SetBusyCallback "busycb", 333;
481
482 SetMsgCallback sub {
483 my ($msg, $level) = @_;
484 print uc strmsglevel $_[1], ": $msg\n";
485 };
486
487 # the following non-trivial FileNameCallback takes care
488 # of some subject lines not detected properly by uulib:
489 SetFileNameCallback sub {
490 return unless $_[1]; # skip "Re:"-plies et al.
491 local $_ = $_[0];
492
493 # the following rules are rather effective on some newsgroups,
494 # like alt.binaries.games.anime, where non-mime, uuencoded data
495 # is very common
496
497 # if we find some *.rar, take it as the filename
498 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
499
500 # one common subject format
501 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
502
503 # - filename.par (04/55)
504 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
505
506 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
507 # - (20 files) No.17 Roseanne.jpg [2/2]
508 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
509
510 # try to detect some common forms of filenames
511 return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
512
513 # otherwise just pass what we have
514 ()
515 };
516
517 # now read all files in the directory uusrc/*
518 for(<uusrc/*>) {
519 my ($retval, $count) = LoadFile ($_, $_, 1);
520 print "file($_), status(", strerror $retval, ") parts($count)\n";
521 }
522
523 SetOption OPT_SAVEPATH, "uudst/";
524
525 # now wade through all files and their source parts
526 $i = 0;
527 while ($uu = GetFileListItem $i) {
528 $i++;
529 print "file nr. $i";
530 print " state ", $uu->state;
531 print " mode ", $uu->mode;
532 print " uudet ", strencoding $uu->uudet;
533 print " size ", $uu->size;
534 print " filename ", $uu->filename;
535 print " subfname ", $uu->subfname;
536 print " mimeid ", $uu->mimeid;
537 print " mimetype ", $uu->mimetype;
538 print "\n";
539
540 # print additional info about all parts
541 for ($uu->parts) {
542 while (my ($k, $v) = each %$_) {
543 print "$k > $v, ";
544 }
545 print "\n";
546 }
547
548 print $uu->filename;
549
550 $uu->remove_temp;
551
552 if (my $err = $uu->decode ()) {
553 print ", ", strerror $err, "\n";
554 } else {
555 print ", saved as uudst/", $uu->filename, "\n";
556 }
557 }
558
559 print "cleanup...\n";
560
561 CleanUp;
562
563 =head1 AUTHOR
564
565 Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
566 by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
567 bugfixed by Marc Lehmann.
568
569 =head1 SEE ALSO
570
571 perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
572
573 =cut