ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
Revision: 1.26
Committed: Sun Dec 10 16:44:00 2006 UTC (17 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-0_7
Changes since 1.25: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 package Convert::UUlib;
2
3 use Carp;
4
5 require Exporter;
6 require DynaLoader;
7
8 $VERSION = "1.07";
9
10 @ISA = qw(Exporter DynaLoader);
11
12 @_consts = qw(
13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
14
15 FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
16 FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
17
18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
19
20 OPT_RBUF OPT_WBUF
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT
25
26 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28
29 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED
31 );
32
33 @_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
35 SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
36 FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
37 RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
38 EncodePartial EncodeToStream EncodeToFile E_PrepSingle
39 E_PrepPartial
40
41 straction strencoding strmsglevel
42 );
43
44 @EXPORT = @_consts;
45 @EXPORT_OK = @_funcs;
46 %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
47
48 bootstrap Convert::UUlib $VERSION;
49
50 Initialize();
51
52 # not when < 5.005_6x
53 # END { CleanUp() }
54
55 for (@_consts) {
56 my $constant = constant($_);
57 *$_ = sub () { $constant };
58 }
59
60 # action code -> string mapping
61 sub straction($) {
62 return 'copying' if $_[0] == &ACT_COPYING;
63 return 'decoding' if $_[0] == &ACT_DECODING;
64 return 'encoding' if $_[0] == &ACT_ENCODING;
65 return 'idle' if $_[0] == &ACT_IDLE;
66 return 'scanning' if $_[0] == &ACT_SCANNING;
67 'unknown';
68 }
69
70 # encoding type -> string mapping
71 sub strencoding($) {
72 return 'uuencode' if $_[0] == &UU_ENCODED;
73 return 'base64' if $_[0] == &B64_ENCODED;
74 return 'yenc' if $_[0] == &YENC_ENCODED;
75 return 'binhex' if $_[0] == &BH_ENCODED;
76 return 'plaintext' if $_[0] == &PT_ENCODED;
77 return 'quoted-printable' if $_[0] == &QP_ENCODED;
78 return 'xxencode' if $_[0] == &XX_ENCODED;
79 'unknown';
80 }
81
82 sub strmsglevel($) {
83 return 'message' if $_[0] == &MSG_MESSAGE;
84 return 'note' if $_[0] == &MSG_NOTE;
85 return 'warning' if $_[0] == &MSG_WARNING;
86 return 'error' if $_[0] == &MSG_ERROR;
87 return 'panic' if $_[0] == &MSG_PANIC;
88 return 'fatal' if $_[0] == &MSG_FATAL;
89 'unknown';
90 }
91
92 1;
93 __END__
94
95 =head1 NAME
96
97 Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview).
98
99 =head1 SYNOPSIS
100
101 use Convert::UUlib ':all';
102
103 # read all the files named on the commandline and decode them
104 # into the CURRENT directory. See below for a longer example.
105 LoadFile $_ for @ARGV;
106 for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
107 if ($uu->state & FILE_OK) {
108 $uu->decode;
109 print $uu->filename, "\n";
110 }
111 }
112
113 =head1 DESCRIPTION
114
115 Read the file doc/library.pdf from the distribution for in-depth
116 information about the C-library used in this interface, and the rest of
117 this document and especially the non-trivial decoder program at the end.
118
119 =head1 EXPORTED CONSTANTS
120
121 =head2 Action code constants
122
123 ACT_IDLE we don't do anything
124 ACT_SCANNING scanning an input file
125 ACT_DECODING decoding into a temp file
126 ACT_COPYING copying temp to target
127 ACT_ENCODING encoding a file
128
129 =head2 Message severity levels
130
131 MSG_MESSAGE just a message, nothing important
132 MSG_NOTE something that should be noticed
133 MSG_WARNING important msg, processing continues
134 MSG_ERROR processing has been terminated
135 MSG_FATAL decoder cannot process further requests
136 MSG_PANIC recovery impossible, app must terminate
137
138 =head2 Options
139
140 OPT_VERSION version number MAJOR.MINORplPATCH (ro)
141 OPT_FAST assumes only one part per file
142 OPT_DUMBNESS switch off the program's intelligence
143 OPT_BRACKPOL give numbers in [] higher precendence
144 OPT_VERBOSE generate informative messages
145 OPT_DESPERATE try to decode incomplete files
146 OPT_IGNREPLY ignore RE:plies (off by default)
147 OPT_OVERWRITE whether it's OK to overwrite ex. files
148 OPT_SAVEPATH prefix to save-files on disk
149 OPT_IGNMODE ignore the original file mode
150 OPT_DEBUG print messages with FILE/LINE info
151 OPT_ERRNO get last error code for RET_IOERR (ro)
152 OPT_PROGRESS retrieve progress information
153 OPT_USETEXT handle text messages
154 OPT_PREAMB handle Mime preambles/epilogues
155 OPT_TINYB64 detect short B64 outside of Mime
156 OPT_ENCEXT extension for single-part encoded files
157 OPT_REMOVE remove input files after decoding (dangerous)
158 OPT_MOREMIME strict MIME adherence
159 OPT_DOTDOT ".."-unescaping has not yet been done on input files
160 OPT_RBUF set default read I/O buffer size in bytes *EXPERIMENTAL*
161 OPT_WBUF set default write I/O buffer size in bytes *EXPERIMENTAL*
162
163 =head2 Result/Error codes
164
165 RET_OK everything went fine
166 RET_IOERR I/O Error - examine errno
167 RET_NOMEM not enough memory
168 RET_ILLVAL illegal value for operation
169 RET_NODATA decoder didn't find any data
170 RET_NOEND encoded data wasn't ended properly
171 RET_UNSUP unsupported function (encoding)
172 RET_EXISTS file exists (decoding)
173 RET_CONT continue -- special from ScanPart
174 RET_CANCEL operation canceled
175
176 =head2 File States
177
178 This code is zero, i.e. "false":
179
180 UUFILE_READ Read in, but not further processed
181
182 The following state codes are or'ed together:
183
184 FILE_MISPART Missing Part(s) detected
185 FILE_NOBEGIN No 'begin' found
186 FILE_NOEND No 'end' found
187 FILE_NODATA File does not contain valid uudata
188 FILE_OK All Parts found, ready to decode
189 FILE_ERROR Error while decoding
190 FILE_DECODED Successfully decoded
191 FILE_TMPFILE Temporary decoded file exists
192
193 =head2 Encoding types
194
195 UU_ENCODED UUencoded data
196 B64_ENCODED Mime-Base64 data
197 XX_ENCODED XXencoded data
198 BH_ENCODED Binhex encoded
199 PT_ENCODED Plain-Text encoded (MIME)
200 QP_ENCODED Quoted-Printable (MIME)
201 YENC_ENCODED yEnc encoded (non-MIME)
202
203 =head1 EXPORTED FUNCTIONS
204
205 =head2 Initializing and cleanup
206
207 Initialize is automatically called when the module is loaded and allocates
208 quite a small amount of memory for todays machines ;) CleanUp releases that
209 again.
210
211 On my machine, a fairly complete decode with DBI backend needs about 10MB
212 RSS to decode 20000 files.
213
214 =over 4
215
216 =item Initialize
217
218 Not normally necessary, (re-)initializes the library.
219
220 =item CleanUp
221
222 Not normally necessary, could be called at the end to release memory
223 before starting a new decoding round.
224
225 =back
226
227 =head2 Setting and querying options
228
229 =over 4
230
231 =item $option = GetOption OPT_xxx
232
233 =item SetOption OPT_xxx, opt-value
234
235 =back
236
237 See the C<OPT_xxx> constants above to see which options exist.
238
239 =head2 Setting various callbacks
240
241 =over 4
242
243 =item SetMsgCallback [callback-function]
244
245 =item SetBusyCallback [callback-function]
246
247 =item SetFileCallback [callback-function]
248
249 =item SetFNameFilter [callback-function]
250
251 =back
252
253 =head2 Call the currently selected FNameFilter
254
255 =over 4
256
257 =item $file = FNameFilter $file
258
259 =back
260
261 =head2 Loading sourcefiles, optionally fuzzy merge and start decoding
262
263 =over 4
264
265 =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
266
267 Load the given file and scan it for encoded contents. Optionally tag it
268 with the given id, and if C<$delflag> is true, delete the file after it
269 is no longer necessary. If you are certain of the part number, you can
270 specify it as the last argument.
271
272 A better (usually faster) way of doing this is using the C<SetFNameFilter>
273 functionality.
274
275 =item $retval = Smerge $pass
276
277 If you are desperate, try to call C<Smerge> with increasing C<$pass>
278 values, beginning at C<0>, to try to merge parts that usually would not
279 have been merged.
280
281 Most probably this will result in garbled files, so never do this by
282 default.
283
284 =item $item = GetFileListItem $item_number
285
286 Return the C<$item> structure for the C<$item_number>'th found file, or
287 C<undef> of no file with that number exists.
288
289 The first file has number C<0>, and the series has no holes, so you can
290 iterate over all files by starting with zero and incrementing until you
291 hit C<undef>.
292
293 =back
294
295 =head2 Decoding files
296
297 =over 4
298
299 =item $retval = $item->rename($newname)
300
301 Change the ondisk filename where the decoded file will be saved.
302
303 =item $retval = $item->decode_temp
304
305 Decode the file into a temporary location, use C<< $item->infile >> to
306 retrieve the temporary filename.
307
308 =item $retval = $item->remove_temp
309
310 Remove the temporarily decoded file again.
311
312 =item $retval = $item->decode([$target_path])
313
314 Decode the file to it's destination, or the given target path.
315
316 =item $retval = $item->info(callback-function)
317
318 =back
319
320 =head2 Querying (and setting) item attributes
321
322 =over 4
323
324 =item $state = $item->state
325
326 =item $mode = $item->mode([newmode])
327
328 =item $uudet = $item->uudet
329
330 =item $size = $item->size
331
332 =item $filename = $item->filename([newfilename})
333
334 =item $subfname = $item->subfname
335
336 =item $mimeid = $item->mimeid
337
338 =item $mimetype = $item->mimetype
339
340 =item $binfile = $item->binfile
341
342 =back
343
344 =head2 Information about source parts
345
346 =over 4
347
348 =item $parts = $item->parts
349
350 Return information about all parts (source files) used to decode the file
351 as a list of hashrefs with the following structure:
352
353 {
354 partno => <integer describing the part number, starting with 1>,
355 # the following member sonly exist when they contain useful information
356 sfname => <local pathname of the file where this part is from>,
357 filename => <the ondisk filename of the decoded file>,
358 subfname => <used to cluster postings, possibly the posting filename>,
359 subject => <the subject of the posting/mail>,
360 origin => <the possible source (From) address>,
361 mimetype => <the possible mimetype of the decoded file>,
362 mimeid => <the id part of the Content-Type>,
363 }
364
365 Usually you are interested mostly the C<sfname> and possibly the C<partno>
366 and C<filename> members.
367
368 =back
369
370 =head2 Functions below not documented and not very well tested
371
372 QuickDecode
373 EncodeMulti
374 EncodePartial
375 EncodeToStream
376 EncodeToFile
377 E_PrepSingle
378 E_PrepPartial
379
380 =head2 EXTENSION FUNCTIONS
381
382 Functions found in this module but not documented in the uulib documentation:
383
384 =over 4
385
386 =item $msg = straction ACT_xxx
387
388 Return a human readable string representing the given action code.
389
390 =item $msg = strerror RET_xxx
391
392 Return a human readable string representing the given error code.
393
394 =item $str = strencoding xxx_ENCODED
395
396 Return the name of the encoding type as a string.
397
398 =item $str = strmsglevel MSG_xxx
399
400 Returns the message level as a string.
401
402 =item SetFileNameCallback $cb
403
404 Sets (or queries) the FileNameCallback, which is called whenever the
405 decoding library can't find a filename and wants to extract a filename
406 from the subject line of a posting. The callback will be called with
407 two arguments, the subject line and the current candidate for the
408 filename. The latter argument can be C<undef>, which means that no
409 filename could be found (and likely no one exists, so it is safe to also
410 return C<undef> in this case). If it doesn't return anything (not even
411 C<undef>!), then nothing happens, so this is a no-op callback:
412
413 sub cb {
414 return ();
415 }
416
417 If it returns C<undef>, then this indicates that no filename could be
418 found. In all other cases, the return value is taken to be the filename.
419
420 This is a slightly more useful callback:
421
422 sub cb {
423 return unless $_[1]; # skip "Re:"-plies et al.
424 my ($subject, $filename) = @_;
425 # if we find some *.rar, take it
426 return $1 if $subject =~ /(\w+\.rar)/;
427 # otherwise just pass what we have
428 return ();
429 }
430
431 =back
432
433 =head1 LARGE EXAMPLE DECODER
434
435 This is the file C<example-decoder> from the distribution, put here
436 instead of more thorough documentation.
437
438 # decode all the files in the directory uusrc/ and copy
439 # the resulting files to uudst/
440
441 use Convert::UUlib ':all';
442
443 sub namefilter {
444 my($path)=@_;
445 $path=~s/^.*[\/\\]//;
446 $path;
447 }
448
449 sub busycb {
450 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
451 $_[0]=straction($action);
452 print "busy_callback(", (join ",",@_), ")\n";
453 0;
454 }
455
456 SetOption OPT_IGNMODE, 1;
457 SetOption OPT_VERBOSE, 1;
458
459 # show the three ways you can set callback functions. I normally
460 # prefer the one with the sub inplace.
461 SetFNameFilter \&namefilter;
462
463 SetBusyCallback "busycb", 333;
464
465 SetMsgCallback sub {
466 my ($msg, $level) = @_;
467 print uc strmsglevel $_[1], ": $msg\n";
468 };
469
470 # the following non-trivial FileNameCallback takes care
471 # of some subject lines not detected properly by uulib:
472 SetFileNameCallback sub {
473 return unless $_[1]; # skip "Re:"-plies et al.
474 local $_ = $_[0];
475
476 # the following rules are rather effective on some newsgroups,
477 # like alt.binaries.games.anime, where non-mime, uuencoded data
478 # is very common
479
480 # if we find some *.rar, take it as the filename
481 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
482
483 # one common subject format
484 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
485
486 # - filename.par (04/55)
487 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
488
489 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
490 # - (20 files) No.17 Roseanne.jpg [2/2]
491 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
492
493 # otherwise just pass what we have
494 return ();
495 };
496
497 # now read all files in the directory uusrc/*
498 for(<uusrc/*>) {
499 my($retval,$count)=LoadFile ($_, $_, 1);
500 print "file($_), status(", strerror $retval, ") parts($count)\n";
501 }
502
503 SetOption OPT_SAVEPATH, "uudst/";
504
505 # now wade through all files and their source parts
506 $i = 0;
507 while ($uu = GetFileListItem($i)) {
508 $i++;
509 print "file nr. $i";
510 print " state ", $uu->state;
511 print " mode ", $uu->mode;
512 print " uudet ", strencoding $uu->uudet;
513 print " size ", $uu->size;
514 print " filename ", $uu->filename;
515 print " subfname ", $uu->subfname;
516 print " mimeid ", $uu->mimeid;
517 print " mimetype ", $uu->mimetype;
518 print "\n";
519
520 # print additional info about all parts
521 for ($uu->parts) {
522 while (my ($k, $v) = each %$_) {
523 print "$k > $v, ";
524 }
525 print "\n";
526 }
527
528 $uu->decode_temp;
529 print " temporarily decoded to ", $uu->binfile, "\n";
530 $uu->remove_temp;
531
532 print strerror $uu->decode;
533 print " saved as uudst/", $uu->filename, "\n";
534 }
535
536 print "cleanup...\n";
537
538 CleanUp();
539
540 =head1 AUTHOR
541
542 Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written
543 by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
544 bugfixed by Marc Lehmann.
545
546 =head1 SEE ALSO
547
548 perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
549
550 =cut