ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
Revision: 1.25
Committed: Mon May 2 19:58:40 2005 UTC (19 years ago) by root
Branch: MAIN
CVS Tags: rel-1_06
Changes since 1.24: +3 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 package Convert::UUlib;
2    
3     use Carp;
4    
5     require Exporter;
6     require DynaLoader;
7    
8 root 1.24 $VERSION = "1.06";
9 root 1.1
10     @ISA = qw(Exporter DynaLoader);
11    
12     @_consts = qw(
13     ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14    
15     FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16     FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17    
18     MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19    
20 root 1.25 OPT_RBUF OPT_WBUF
21 root 1.1 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22     OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23     OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 root 1.8 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT
25 root 1.1
26     RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27     RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28    
29 root 1.11 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 root 1.5 XX_ENCODED UU_ENCODED YENC_ENCODED
31 root 1.1 );
32    
33     @_funcs = qw(
34 root 1.6 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35     SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36     FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
37     RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38     EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39     E_PrepPartial
40 root 1.1
41     straction strencoding strmsglevel
42     );
43    
44     @EXPORT = @_consts;
45     @EXPORT_OK = @_funcs;
46     %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47    
48     bootstrap Convert::UUlib $VERSION;
49    
50     Initialize();
51    
52     # not when < 5.005_6x
53     # END { CleanUp() }
54    
55     for (@_consts) {
56     my $constant = constant($_);
57     *$_ = sub () { $constant };
58     }
59    
60     # action code -> string mapping
61     sub straction($) {
62     return 'copying' if $_[0] == &ACT_COPYING;
63     return 'decoding' if $_[0] == &ACT_DECODING;
64     return 'encoding' if $_[0] == &ACT_ENCODING;
65     return 'idle' if $_[0] == &ACT_IDLE;
66     return 'scanning' if $_[0] == &ACT_SCANNING;
67 root 1.5 'unknown';
68 root 1.1 }
69    
70     # encoding type -> string mapping
71     sub strencoding($) {
72 root 1.5 return 'uuencode' if $_[0] == &UU_ENCODED;
73 root 1.11 return 'base64' if $_[0] == &B64_ENCODED;
74 root 1.5 return 'yenc' if $_[0] == &YENC_ENCODED;
75 root 1.1 return 'binhex' if $_[0] == &BH_ENCODED;
76     return 'plaintext' if $_[0] == &PT_ENCODED;
77     return 'quoted-printable' if $_[0] == &QP_ENCODED;
78     return 'xxencode' if $_[0] == &XX_ENCODED;
79 root 1.5 'unknown';
80 root 1.1 }
81    
82     sub strmsglevel($) {
83     return 'message' if $_[0] == &MSG_MESSAGE;
84     return 'note' if $_[0] == &MSG_NOTE;
85     return 'warning' if $_[0] == &MSG_WARNING;
86     return 'error' if $_[0] == &MSG_ERROR;
87     return 'panic' if $_[0] == &MSG_PANIC;
88     return 'fatal' if $_[0] == &MSG_FATAL;
89 root 1.5 'unknown';
90 root 1.1 }
91    
92     1;
93     __END__
94    
95     =head1 NAME
96    
97     Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview).
98    
99     =head1 SYNOPSIS
100    
101     use Convert::UUlib ':all';
102    
103 root 1.11 # read all the files named on the commandline and decode them
104 root 1.13 # into the CURRENT directory. See below for a longer example.
105     LoadFile $_ for @ARGV;
106     for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
107     if ($uu->state & FILE_OK) {
108     $uu->decode;
109     print $uu->filename, "\n";
110     }
111 root 1.1 }
112    
113 root 1.11 =head1 DESCRIPTION
114 root 1.1
115 root 1.11 Read the file doc/library.pdf from the distribution for in-depth
116     information about the C-library used in this interface, and the rest of
117     this document and especially the non-trivial decoder program at the end.
118    
119     =head1 EXPORTED CONSTANTS
120    
121     =head2 Action code constants
122    
123     ACT_IDLE we don't do anything
124     ACT_SCANNING scanning an input file
125     ACT_DECODING decoding into a temp file
126     ACT_COPYING copying temp to target
127     ACT_ENCODING encoding a file
128    
129     =head2 Message severity levels
130    
131     MSG_MESSAGE just a message, nothing important
132     MSG_NOTE something that should be noticed
133     MSG_WARNING important msg, processing continues
134     MSG_ERROR processing has been terminated
135     MSG_FATAL decoder cannot process further requests
136     MSG_PANIC recovery impossible, app must terminate
137    
138     =head2 Options
139    
140     OPT_VERSION version number MAJOR.MINORplPATCH (ro)
141     OPT_FAST assumes only one part per file
142     OPT_DUMBNESS switch off the program's intelligence
143     OPT_BRACKPOL give numbers in [] higher precendence
144     OPT_VERBOSE generate informative messages
145     OPT_DESPERATE try to decode incomplete files
146     OPT_IGNREPLY ignore RE:plies (off by default)
147     OPT_OVERWRITE whether it's OK to overwrite ex. files
148     OPT_SAVEPATH prefix to save-files on disk
149     OPT_IGNMODE ignore the original file mode
150     OPT_DEBUG print messages with FILE/LINE info
151     OPT_ERRNO get last error code for RET_IOERR (ro)
152     OPT_PROGRESS retrieve progress information
153     OPT_USETEXT handle text messages
154     OPT_PREAMB handle Mime preambles/epilogues
155     OPT_TINYB64 detect short B64 outside of Mime
156     OPT_ENCEXT extension for single-part encoded files
157 root 1.14 OPT_REMOVE remove input files after decoding (dangerous)
158 root 1.11 OPT_MOREMIME strict MIME adherence
159 root 1.15 OPT_DOTDOT ".."-unescaping has not yet been done on input files
160 root 1.25 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL*
161     OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL*
162 root 1.11
163     =head2 Result/Error codes
164    
165     RET_OK everything went fine
166     RET_IOERR I/O Error - examine errno
167     RET_NOMEM not enough memory
168     RET_ILLVAL illegal value for operation
169     RET_NODATA decoder didn't find any data
170     RET_NOEND encoded data wasn't ended properly
171     RET_UNSUP unsupported function (encoding)
172     RET_EXISTS file exists (decoding)
173     RET_CONT continue -- special from ScanPart
174     RET_CANCEL operation canceled
175    
176     =head2 File States
177    
178     This code is zero, i.e. "false":
179    
180     UUFILE_READ Read in, but not further processed
181    
182 root 1.13 The following state codes are or'ed together:
183 root 1.11
184     FILE_MISPART Missing Part(s) detected
185     FILE_NOBEGIN No 'begin' found
186     FILE_NOEND No 'end' found
187     FILE_NODATA File does not contain valid uudata
188     FILE_OK All Parts found, ready to decode
189     FILE_ERROR Error while decoding
190     FILE_DECODED Successfully decoded
191     FILE_TMPFILE Temporary decoded file exists
192    
193     =head2 Encoding types
194    
195     UU_ENCODED UUencoded data
196     B64_ENCODED Mime-Base64 data
197     XX_ENCODED XXencoded data
198     BH_ENCODED Binhex encoded
199     PT_ENCODED Plain-Text encoded (MIME)
200     QP_ENCODED Quoted-Printable (MIME)
201     YENC_ENCODED yEnc encoded (non-MIME)
202 root 1.1
203 root 1.11 =head1 EXPORTED FUNCTIONS
204 root 1.1
205 root 1.11 =head2 Initializing and cleanup
206 root 1.1
207 root 1.11 Initialize is automatically called when the module is loaded and allocates
208 root 1.13 quite a small amount of memory for todays machines ;) CleanUp releases that
209     again.
210 root 1.14
211     On my machine, a fairly complete decode with DBI backend needs about 10MB
212     RSS to decode 20000 files.
213 root 1.1
214 root 1.13 =over 4
215    
216     =item Initialize
217    
218     Not normally necessary, (re-)initializes the library.
219    
220     =item CleanUp
221    
222     Not normally necessary, could be called at the end to release memory
223     before starting a new decoding round.
224    
225     =back
226 root 1.1
227 root 1.11 =head2 Setting and querying options
228 root 1.1
229 root 1.13 =over 4
230    
231     =item $option = GetOption OPT_xxx
232    
233     =item SetOption OPT_xxx, opt-value
234    
235     =back
236    
237     See the C<OPT_xxx> constants above to see which options exist.
238 root 1.1
239 root 1.11 =head2 Setting various callbacks
240 root 1.1
241 root 1.13 =over 4
242    
243     =item SetMsgCallback [callback-function]
244    
245     =item SetBusyCallback [callback-function]
246    
247     =item SetFileCallback [callback-function]
248    
249     =item SetFNameFilter [callback-function]
250    
251     =back
252 root 1.1
253 root 1.11 =head2 Call the currently selected FNameFilter
254 root 1.1
255 root 1.13 =over 4
256    
257     =item $file = FNameFilter $file
258    
259     =back
260 root 1.1
261 root 1.11 =head2 Loading sourcefiles, optionally fuzzy merge and start decoding
262 root 1.1
263 root 1.13 =over 4
264    
265 root 1.16 =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
266 root 1.13
267     Load the given file and scan it for encoded contents. Optionally tag it
268 root 1.16 with the given id, and if C<$delflag> is true, delete the file after it
269 root 1.18 is no longer necessary. If you are certain of the part number, you can
270     specify it as the last argument.
271 root 1.16
272     A better (usually faster) way of doing this is using the C<SetFNameFilter>
273     functionality.
274 root 1.13
275     =item $retval = Smerge $pass
276    
277     If you are desperate, try to call C<Smerge> with increasing C<$pass>
278     values, beginning at C<0>, to try to merge parts that usually would not
279     have been merged.
280    
281     Most probably this will result in garbled files, so never do this by
282     default.
283    
284     =item $item = GetFileListItem $item_number
285    
286     Return the C<$item> structure for the C<$item_number>'th found file, or
287     C<undef> of no file with that number exists.
288    
289     The first file has number C<0>, and the series has no holes, so you can
290     iterate over all files by starting with zero and incrementing until you
291     hit C<undef>.
292    
293     =back
294    
295     =head2 Decoding files
296    
297     =over 4
298    
299     =item $retval = $item->rename($newname)
300    
301     Change the ondisk filename where the decoded file will be saved.
302    
303     =item $retval = $item->decode_temp
304    
305     Decode the file into a temporary location, use C<< $item->infile >> to
306     retrieve the temporary filename.
307    
308     =item $retval = $item->remove_temp
309    
310     Remove the temporarily decoded file again.
311    
312     =item $retval = $item->decode([$target_path])
313    
314     Decode the file to it's destination, or the given target path.
315    
316     =item $retval = $item->info(callback-function)
317    
318     =back
319 root 1.1
320 root 1.11 =head2 Querying (and setting) item attributes
321 root 1.1
322 root 1.13 =over 4
323    
324     =item $state = $item->state
325    
326     =item $mode = $item->mode([newmode])
327    
328     =item $uudet = $item->uudet
329    
330     =item $size = $item->size
331    
332     =item $filename = $item->filename([newfilename})
333    
334     =item $subfname = $item->subfname
335    
336     =item $mimeid = $item->mimeid
337    
338     =item $mimetype = $item->mimetype
339    
340     =item $binfile = $item->binfile
341    
342     =back
343    
344     =head2 Information about source parts
345    
346     =over 4
347    
348     =item $parts = $item->parts
349    
350     Return information about all parts (source files) used to decode the file
351     as a list of hashrefs with the following structure:
352    
353     {
354     partno => <integer describing the part number, starting with 1>,
355     # the following member sonly exist when they contain useful information
356     sfname => <local pathname of the file where this part is from>,
357     filename => <the ondisk filename of the decoded file>,
358     subfname => <used to cluster postings, possibly the posting filename>,
359     subject => <the subject of the posting/mail>,
360     origin => <the possible source (From) address>,
361     mimetype => <the possible mimetype of the decoded file>,
362     mimeid => <the id part of the Content-Type>,
363     }
364 root 1.1
365 root 1.13 Usually you are interested mostly the C<sfname> and possibly the C<partno>
366     and C<filename> members.
367 root 1.1
368 root 1.13 =back
369 root 1.1
370 root 1.11 =head2 Functions below not documented and not very well tested
371 root 1.1
372 root 1.11 QuickDecode
373     EncodeMulti
374     EncodePartial
375     EncodeToStream
376     EncodeToFile
377     E_PrepSingle
378     E_PrepPartial
379 root 1.6
380     =head2 EXTENSION FUNCTIONS
381    
382     Functions found in this module but not documented in the uulib documentation:
383    
384     =over 4
385    
386 root 1.11 =item $msg = straction ACT_xxx
387    
388     Return a human readable string representing the given action code.
389    
390     =item $msg = strerror RET_xxx
391    
392     Return a human readable string representing the given error code.
393    
394     =item $str = strencoding xxx_ENCODED
395    
396     Return the name of the encoding type as a string.
397    
398     =item $str = strmsglevel MSG_xxx
399    
400     Returns the message level as a string.
401    
402 root 1.6 =item SetFileNameCallback $cb
403    
404     Sets (or queries) the FileNameCallback, which is called whenever the
405     decoding library can't find a filename and wants to extract a filename
406     from the subject line of a posting. The callback will be called with
407     two arguments, the subject line and the current candidate for the
408     filename. The latter argument can be C<undef>, which means that no
409     filename could be found (and likely no one exists, so it is safe to also
410     return C<undef> in this case). If it doesn't return anything (not even
411     C<undef>!), then nothing happens, so this is a no-op callback:
412    
413     sub cb {
414     return ();
415     }
416    
417     If it returns C<undef>, then this indicates that no filename could be
418     found. In all other cases, the return value is taken to be the filename.
419    
420     This is a slightly more useful callback:
421    
422     sub cb {
423     return unless $_[1]; # skip "Re:"-plies et al.
424     my ($subject, $filename) = @_;
425     # if we find some *.rar, take it
426     return $1 if $subject =~ /(\w+\.rar)/;
427     # otherwise just pass what we have
428     return ();
429     }
430    
431     =back
432 root 1.1
433 root 1.11 =head1 LARGE EXAMPLE DECODER
434    
435     This is the file C<example-decoder> from the distribution, put here
436     instead of more thorough documentation.
437    
438     # decode all the files in the directory uusrc/ and copy
439     # the resulting files to uudst/
440    
441     use Convert::UUlib ':all';
442    
443     sub namefilter {
444     my($path)=@_;
445     $path=~s/^.*[\/\\]//;
446     $path;
447     }
448    
449     sub busycb {
450     my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
451     $_[0]=straction($action);
452     print "busy_callback(", (join ",",@_), ")\n";
453     0;
454     }
455    
456     SetOption OPT_IGNMODE, 1;
457     SetOption OPT_VERBOSE, 1;
458    
459     # show the three ways you can set callback functions. I normally
460     # prefer the one with the sub inplace.
461     SetFNameFilter \&namefilter;
462    
463     SetBusyCallback "busycb", 333;
464    
465     SetMsgCallback sub {
466     my ($msg, $level) = @_;
467     print uc strmsglevel $_[1], ": $msg\n";
468     };
469    
470     # the following non-trivial FileNameCallback takes care
471     # of some subject lines not detected properly by uulib:
472     SetFileNameCallback sub {
473     return unless $_[1]; # skip "Re:"-plies et al.
474     local $_ = $_[0];
475    
476     # the following rules are rather effective on some newsgroups,
477     # like alt.binaries.games.anime, where non-mime, uuencoded data
478     # is very common
479    
480     # if we find some *.rar, take it as the filename
481     return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
482    
483     # one common subject format
484     return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
485    
486     # - filename.par (04/55)
487     return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
488    
489     # - (xxx) No. 1 sayuri81.jpg 756565 bytes
490     # - (20 files) No.17 Roseanne.jpg [2/2]
491     return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
492    
493     # otherwise just pass what we have
494     return ();
495     };
496    
497     # now read all files in the directory uusrc/*
498     for(<uusrc/*>) {
499     my($retval,$count)=LoadFile ($_, $_, 1);
500     print "file($_), status(", strerror $retval, ") parts($count)\n";
501     }
502    
503     SetOption OPT_SAVEPATH, "uudst/";
504    
505     # now wade through all files and their source parts
506     $i = 0;
507     while ($uu = GetFileListItem($i)) {
508     $i++;
509     print "file nr. $i";
510     print " state ", $uu->state;
511     print " mode ", $uu->mode;
512     print " uudet ", strencoding $uu->uudet;
513     print " size ", $uu->size;
514     print " filename ", $uu->filename;
515     print " subfname ", $uu->subfname;
516     print " mimeid ", $uu->mimeid;
517     print " mimetype ", $uu->mimetype;
518     print "\n";
519    
520     # print additional info about all parts
521     for ($uu->parts) {
522     while (my ($k, $v) = each %$_) {
523     print "$k > $v, ";
524     }
525     print "\n";
526     }
527    
528     $uu->decode_temp;
529     print " temporarily decoded to ", $uu->binfile, "\n";
530     $uu->remove_temp;
531    
532     print strerror $uu->decode;
533     print " saved as uudst/", $uu->filename, "\n";
534     }
535    
536     print "cleanup...\n";
537    
538     CleanUp();
539    
540 root 1.1 =head1 AUTHOR
541    
542 root 1.23 Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
543 root 1.11 by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
544     bugfixed by Marc Lehmann.
545 root 1.1
546     =head1 SEE ALSO
547    
548     perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
549    
550     =cut