ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/UUlib.pm
(Generate patch)

Comparing Convert-UUlib/UUlib.pm (file contents):
Revision 1.7 by root, Fri Apr 5 22:16:53 2002 UTC vs.
Revision 1.14 by root, Tue Oct 15 23:34:41 2002 UTC

2 2
3use Carp; 3use Carp;
4 4
5require Exporter; 5require Exporter;
6require DynaLoader; 6require DynaLoader;
7use AutoLoader;
8 7
9$VERSION = 0.211; 8$VERSION = 0.31;
10 9
11@ISA = qw(Exporter DynaLoader); 10@ISA = qw(Exporter DynaLoader);
12 11
13@_consts = qw( 12@_consts = qw(
14 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING 13 ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
19 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING 18 MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
20 19
21 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT 20 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
22 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB 21 OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
23 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE 22 OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
24 OPT_VERSION OPT_REMOVE OPT_MOREMIME 23 OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT
25 24
26 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA 25 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
27 RET_NOEND RET_NOMEM RET_OK RET_UNSUP 26 RET_NOEND RET_NOMEM RET_OK RET_UNSUP
28 27
29 B64ENCODED BH_ENCODED PT_ENCODED QP_ENCODED 28 B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
30 XX_ENCODED UU_ENCODED YENC_ENCODED 29 XX_ENCODED UU_ENCODED YENC_ENCODED
31); 30);
32 31
33@_funcs = qw( 32@_funcs = qw(
34 Initialize CleanUp GetOption SetOption strerror SetMsgCallback 33 Initialize CleanUp GetOption SetOption strerror SetMsgCallback
68} 67}
69 68
70# encoding type -> string mapping 69# encoding type -> string mapping
71sub strencoding($) { 70sub strencoding($) {
72 return 'uuencode' if $_[0] == &UU_ENCODED; 71 return 'uuencode' if $_[0] == &UU_ENCODED;
73 return 'base64' if $_[0] == &B64ENCODED; 72 return 'base64' if $_[0] == &B64_ENCODED;
74 return 'yenc' if $_[0] == &YENC_ENCODED; 73 return 'yenc' if $_[0] == &YENC_ENCODED;
75 return 'binhex' if $_[0] == &BH_ENCODED; 74 return 'binhex' if $_[0] == &BH_ENCODED;
76 return 'plaintext' if $_[0] == &PT_ENCODED; 75 return 'plaintext' if $_[0] == &PT_ENCODED;
77 return 'quoted-printable' if $_[0] == &QP_ENCODED; 76 return 'quoted-printable' if $_[0] == &QP_ENCODED;
78 return 'xxencode' if $_[0] == &XX_ENCODED; 77 return 'xxencode' if $_[0] == &XX_ENCODED;
96 95
97Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). 96Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview).
98 97
99=head1 SYNOPSIS 98=head1 SYNOPSIS
100 99
101 use Convert::UUlib;
102
103=head1 DESCRIPTION
104
105Read the file uulibdoc.dvi.gz and the example-decoder source. Sorry - more
106to come once people use me ;)
107
108=head1 SMALL EXAMPLE DECODER
109
110The following code excerpt is a minimal decoder program. It reads all
111files given on the commandline and decodes any files in it.
112
113 use Convert::UUlib ':all'; 100 use Convert::UUlib ':all';
114 101
102 # read all the files named on the commandline and decode them
103 # into the CURRENT directory. See below for a longer example.
115 LoadFile($_) for @ARGV; 104 LoadFile $_ for @ARGV;
116
117 for($i=0; $uu=GetFileListItem($i); $i++) { 105 for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
118 $uu->decode if $uu->state & FILE_OK; 106 if ($uu->state & FILE_OK) {
107 $uu->decode;
108 print $uu->filename, "\n";
109 }
119 } 110 }
120 111
121=head1 LARGE EXAMPLE DECODER 112=head1 DESCRIPTION
122 113
123This is the file C<example-decoder> from the distribution, put here 114Read the file doc/library.pdf from the distribution for in-depth
124instead of more thorough documentation. 115information about the C-library used in this interface, and the rest of
116this document and especially the non-trivial decoder program at the end.
125 117
126 # decode all the files in the directory uusrc/ and copy 118=head1 EXPORTED CONSTANTS
127 # the resulting files to uudst/
128 119
129 use Convert::UUlib ':all'; 120=head2 Action code constants
130 121
131 sub namefilter { 122 ACT_IDLE we don't do anything
132 my($path)=@_; 123 ACT_SCANNING scanning an input file
133 $path=~s/^.*[\/\\]//; 124 ACT_DECODING decoding into a temp file
134 $path; 125 ACT_COPYING copying temp to target
126 ACT_ENCODING encoding a file
127
128=head2 Message severity levels
129
130 MSG_MESSAGE just a message, nothing important
131 MSG_NOTE something that should be noticed
132 MSG_WARNING important msg, processing continues
133 MSG_ERROR processing has been terminated
134 MSG_FATAL decoder cannot process further requests
135 MSG_PANIC recovery impossible, app must terminate
136
137=head2 Options
138
139 OPT_VERSION version number MAJOR.MINORplPATCH (ro)
140 OPT_FAST assumes only one part per file
141 OPT_DUMBNESS switch off the program's intelligence
142 OPT_BRACKPOL give numbers in [] higher precendence
143 OPT_VERBOSE generate informative messages
144 OPT_DESPERATE try to decode incomplete files
145 OPT_IGNREPLY ignore RE:plies (off by default)
146 OPT_OVERWRITE whether it's OK to overwrite ex. files
147 OPT_SAVEPATH prefix to save-files on disk
148 OPT_IGNMODE ignore the original file mode
149 OPT_DEBUG print messages with FILE/LINE info
150 OPT_ERRNO get last error code for RET_IOERR (ro)
151 OPT_PROGRESS retrieve progress information
152 OPT_USETEXT handle text messages
153 OPT_PREAMB handle Mime preambles/epilogues
154 OPT_TINYB64 detect short B64 outside of Mime
155 OPT_ENCEXT extension for single-part encoded files
156 OPT_REMOVE remove input files after decoding (dangerous)
157 OPT_MOREMIME strict MIME adherence
158 OPT_DOTDOT .. unescaping has not yet been done on input files
159
160=head2 Result/Error codes
161
162 RET_OK everything went fine
163 RET_IOERR I/O Error - examine errno
164 RET_NOMEM not enough memory
165 RET_ILLVAL illegal value for operation
166 RET_NODATA decoder didn't find any data
167 RET_NOEND encoded data wasn't ended properly
168 RET_UNSUP unsupported function (encoding)
169 RET_EXISTS file exists (decoding)
170 RET_CONT continue -- special from ScanPart
171 RET_CANCEL operation canceled
172
173=head2 File States
174
175 This code is zero, i.e. "false":
176
177 UUFILE_READ Read in, but not further processed
178
179 The following state codes are or'ed together:
180
181 FILE_MISPART Missing Part(s) detected
182 FILE_NOBEGIN No 'begin' found
183 FILE_NOEND No 'end' found
184 FILE_NODATA File does not contain valid uudata
185 FILE_OK All Parts found, ready to decode
186 FILE_ERROR Error while decoding
187 FILE_DECODED Successfully decoded
188 FILE_TMPFILE Temporary decoded file exists
189
190=head2 Encoding types
191
192 UU_ENCODED UUencoded data
193 B64_ENCODED Mime-Base64 data
194 XX_ENCODED XXencoded data
195 BH_ENCODED Binhex encoded
196 PT_ENCODED Plain-Text encoded (MIME)
197 QP_ENCODED Quoted-Printable (MIME)
198 YENC_ENCODED yEnc encoded (non-MIME)
199
200=head1 EXPORTED FUNCTIONS
201
202=head2 Initializing and cleanup
203
204Initialize is automatically called when the module is loaded and allocates
205quite a small amount of memory for todays machines ;) CleanUp releases that
206again.
207
208On my machine, a fairly complete decode with DBI backend needs about 10MB
209RSS to decode 20000 files.
210
211=over 4
212
213=item Initialize
214
215Not normally necessary, (re-)initializes the library.
216
217=item CleanUp
218
219Not normally necessary, could be called at the end to release memory
220before starting a new decoding round.
221
222=back
223
224=head2 Setting and querying options
225
226=over 4
227
228=item $option = GetOption OPT_xxx
229
230=item SetOption OPT_xxx, opt-value
231
232=back
233
234See the C<OPT_xxx> constants above to see which options exist.
235
236=head2 Setting various callbacks
237
238=over 4
239
240=item SetMsgCallback [callback-function]
241
242=item SetBusyCallback [callback-function]
243
244=item SetFileCallback [callback-function]
245
246=item SetFNameFilter [callback-function]
247
248=back
249
250=head2 Call the currently selected FNameFilter
251
252=over 4
253
254=item $file = FNameFilter $file
255
256=back
257
258=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
259
260=over 4
261
262=item ($retval, $count) = LoadFile $fname, [$id, [$delflag]]
263
264Load the given file and scan it for encoded contents. Optionally tag it
265with the given id, and if C<$delflag> is true, delete the file after it is
266no longer necessary.
267
268=item $retval = Smerge $pass
269
270If you are desperate, try to call C<Smerge> with increasing C<$pass>
271values, beginning at C<0>, to try to merge parts that usually would not
272have been merged.
273
274Most probably this will result in garbled files, so never do this by
275default.
276
277=item $item = GetFileListItem $item_number
278
279Return the C<$item> structure for the C<$item_number>'th found file, or
280C<undef> of no file with that number exists.
281
282The first file has number C<0>, and the series has no holes, so you can
283iterate over all files by starting with zero and incrementing until you
284hit C<undef>.
285
286=back
287
288=head2 Decoding files
289
290=over 4
291
292=item $retval = $item->rename($newname)
293
294Change the ondisk filename where the decoded file will be saved.
295
296=item $retval = $item->decode_temp
297
298Decode the file into a temporary location, use C<< $item->infile >> to
299retrieve the temporary filename.
300
301=item $retval = $item->remove_temp
302
303Remove the temporarily decoded file again.
304
305=item $retval = $item->decode([$target_path])
306
307Decode the file to it's destination, or the given target path.
308
309=item $retval = $item->info(callback-function)
310
311=back
312
313=head2 Querying (and setting) item attributes
314
315=over 4
316
317=item $state = $item->state
318
319=item $mode = $item->mode([newmode])
320
321=item $uudet = $item->uudet
322
323=item $size = $item->size
324
325=item $filename = $item->filename([newfilename})
326
327=item $subfname = $item->subfname
328
329=item $mimeid = $item->mimeid
330
331=item $mimetype = $item->mimetype
332
333=item $binfile = $item->binfile
334
335=back
336
337=head2 Information about source parts
338
339=over 4
340
341=item $parts = $item->parts
342
343Return information about all parts (source files) used to decode the file
344as a list of hashrefs with the following structure:
345
346 {
347 partno => <integer describing the part number, starting with 1>,
348 # the following member sonly exist when they contain useful information
349 sfname => <local pathname of the file where this part is from>,
350 filename => <the ondisk filename of the decoded file>,
351 subfname => <used to cluster postings, possibly the posting filename>,
352 subject => <the subject of the posting/mail>,
353 origin => <the possible source (From) address>,
354 mimetype => <the possible mimetype of the decoded file>,
355 mimeid => <the id part of the Content-Type>,
135 } 356 }
136 357
137 sub busycb { 358Usually you are interested mostly the C<sfname> and possibly the C<partno>
138 my($action,$curfile,$partno,$numparts,$percent,$fsize)=@_; 359and C<filename> members.
139 $_[0]=straction($action);
140 print "busy_callback(",join(",",@_),")\n";
141 0;
142 }
143 360
144 SetOption (OPT_IGNMODE, 1); 361=back
145 SetOption (OPT_VERBOSE, 1);
146 362
147 # show the three ways you can set callback functions
148 SetFNameFilter (\&namefilter);
149
150 SetBusyCallback ("busycb",333);
151
152 SetMsgCallback (sub {
153 my($msg,$level)=@_;
154 print uc(strmsglevel($_[1])),": $msg\n";
155 });
156
157 for(<uusrc/*>) {
158 my($retval,$count)=LoadFile ($_,$_,1);
159 print "file($_), status(",strerror($retval),") parts($count)\n";
160 }
161
162 SetOption (OPT_SAVEPATH, "uudst/");
163
164 $i=0;
165 while($uu=GetFileListItem($i)) {
166 $i++;
167 print "file nr. $i";
168 print " state ",$uu->state;
169 print " mode ",$uu->mode;
170 print " uudet ",strencoding($uu->uudet);
171 print " size ",$uu->size;
172 print " filename ",$uu->filename;
173 print " subfname ",$uu->subfname;
174 print " mimeid ",$uu->mimeid;
175 print " mimetype ",$uu->mimetype;
176 print "\n";
177
178 # print additional info about all parts
179 for($uu->parts) {
180 while(my($k,$v)=each(%$_)) {
181 print "$k > $v, ";
182 }
183 print "\n";
184 }
185
186 $uu->decode_temp;
187 print " temporarily decoded to ",$uu->binfile,"\n";
188 $uu->remove_temp;
189
190 print strerror($uu->decode);
191 print " saved as uudst/",$uu->filename,"\n";
192 }
193
194 print "cleanup...\n";
195
196 CleanUp();
197
198=head1 Exported constants
199
200Action code constants:
201
202 ACT_COPYING ACT_DECODING ACT_ENCODING
203 ACT_IDLE ACT_SCANNING
204
205File status flags:
206
207 FILE_DECODED FILE_ERROR FILE_MISPART
208 FILE_NOBEGIN FILE_NODATA FILE_NOEND
209 FILE_OK FILE_READ FILE_TMPFILE
210
211Message severity levels:
212
213 MSG_ERROR MSG_FATAL MSG_MESSAGE
214 MSG_NOTE MSG_PANIC MSG_WARNING
215
216Options:
217
218 OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS
219 OPT_ENCEXT OPT_ERRNO OPT_FAST OPT_IGNMODE
220 OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB OPT_PROGRESS
221 OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
222 OPT_VERSION OPT_REMOVE OPT_MOREMIME
223
224Error/Result codes:
225
226 RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR
227 RET_NODATA RET_NOEND RET_NOMEM RET_OK RET_UNSUP
228
229Encoding types:
230
231 B64ENCODED BH_ENCODED PT_ENCODED QP_ENCODED XX_ENCODED
232 UU_ENCODED YENC_ENCODED
233
234=head1 Exported functions
235
236Initializing and cleanup (Initialize is automatically called when the
237module is loaded and allocates quite a bit of memory. CleanUp releases
238that again).
239
240 Initialize; # not normally necessary
241 CleanUp; # could be called at the end to release memory
242
243Setting and querying options:
244
245 $option = GetOption OPT_xxx;
246 SetOption OPT_xxx, opt-value;
247
248Error and action values => stringified:
249
250 $msg = straction ACT_xxx;
251 $msg = strerror RET_xxx;
252
253Setting various callbacks:
254
255 SetMsgCallback [callback-function];
256 SetBusyCallback [callback-function];
257 SetFileCallback [callback-function];
258 SetFNameFilter [callback-function];
259
260Call the currently selected FNameFilter:
261
262 $file = FNameFilter $file;
263
264Loading sourcefiles, optionally fuzzy merge and start decoding:
265
266 ($retval, $count) = LoadFile $fname, [$id, [$delflag]];
267 $retval = Smerge $pass;
268 $item = GetFileListItem $item_number;
269
270The procedural interface is undocumented, use the following methods instead:
271
272 $retval = $item->rename($newname);
273 $retval = $item->decode_temp;
274 $retval = $item->remove_temp;
275 $retval = $item->decode([$target_path]);
276 $retval = $item->info(callback-function);
277
278Querying (and setting) item attributes:
279
280 $state = $item->state;
281 $mode = $item->mode([newmode]);
282 $uudet = $item->uudet;
283 $size = $item->size;
284 $filename = $item->filename([newfilename});
285 $subfname = $item->subfname;
286 $mimeid = $item->mimeid;
287 $mimetype = $item->mimetype;
288 $binfile = $item->binfile;
289
290Totally undocumented and unsupported(!):
291
292 $parts = $item->parts;
293
294Functions below not documented and not very well tested: 363=head2 Functions below not documented and not very well tested
295 364
296 int QuickDecode () ; 365 QuickDecode
297 int EncodeMulti () ; 366 EncodeMulti
298 int EncodePartial () ; 367 EncodePartial
299 int EncodeToStream () ; 368 EncodeToStream
300 int EncodeToFile () ; 369 EncodeToFile
301 int E_PrepSingle () ; 370 E_PrepSingle
302 int E_PrepPartial () ; 371 E_PrepPartial
303 372
304=head2 EXTENSION FUNCTIONS 373=head2 EXTENSION FUNCTIONS
305 374
306Functions found in this module but not documented in the uulib documentation: 375Functions found in this module but not documented in the uulib documentation:
307 376
308=over 4 377=over 4
378
379=item $msg = straction ACT_xxx
380
381Return a human readable string representing the given action code.
382
383=item $msg = strerror RET_xxx
384
385Return a human readable string representing the given error code.
386
387=item $str = strencoding xxx_ENCODED
388
389Return the name of the encoding type as a string.
390
391=item $str = strmsglevel MSG_xxx
392
393Returns the message level as a string.
309 394
310=item SetFileNameCallback $cb 395=item SetFileNameCallback $cb
311 396
312Sets (or queries) the FileNameCallback, which is called whenever the 397Sets (or queries) the FileNameCallback, which is called whenever the
313decoding library can't find a filename and wants to extract a filename 398decoding library can't find a filename and wants to extract a filename
336 return (); 421 return ();
337 } 422 }
338 423
339=back 424=back
340 425
426=head1 LARGE EXAMPLE DECODER
427
428This is the file C<example-decoder> from the distribution, put here
429instead of more thorough documentation.
430
431 # decode all the files in the directory uusrc/ and copy
432 # the resulting files to uudst/
433
434 use Convert::UUlib ':all';
435
436 sub namefilter {
437 my($path)=@_;
438 $path=~s/^.*[\/\\]//;
439 $path;
440 }
441
442 sub busycb {
443 my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
444 $_[0]=straction($action);
445 print "busy_callback(", (join ",",@_), ")\n";
446 0;
447 }
448
449 SetOption OPT_IGNMODE, 1;
450 SetOption OPT_VERBOSE, 1;
451
452 # show the three ways you can set callback functions. I normally
453 # prefer the one with the sub inplace.
454 SetFNameFilter \&namefilter;
455
456 SetBusyCallback "busycb", 333;
457
458 SetMsgCallback sub {
459 my ($msg, $level) = @_;
460 print uc strmsglevel $_[1], ": $msg\n";
461 };
462
463 # the following non-trivial FileNameCallback takes care
464 # of some subject lines not detected properly by uulib:
465 SetFileNameCallback sub {
466 return unless $_[1]; # skip "Re:"-plies et al.
467 local $_ = $_[0];
468
469 # the following rules are rather effective on some newsgroups,
470 # like alt.binaries.games.anime, where non-mime, uuencoded data
471 # is very common
472
473 # if we find some *.rar, take it as the filename
474 return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
475
476 # one common subject format
477 return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
478
479 # - filename.par (04/55)
480 return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
481
482 # - (xxx) No. 1 sayuri81.jpg 756565 bytes
483 # - (20 files) No.17 Roseanne.jpg [2/2]
484 return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
485
486 # otherwise just pass what we have
487 return ();
488 };
489
490 # now read all files in the directory uusrc/*
491 for(<uusrc/*>) {
492 my($retval,$count)=LoadFile ($_, $_, 1);
493 print "file($_), status(", strerror $retval, ") parts($count)\n";
494 }
495
496 SetOption OPT_SAVEPATH, "uudst/";
497
498 # now wade through all files and their source parts
499 $i = 0;
500 while ($uu = GetFileListItem($i)) {
501 $i++;
502 print "file nr. $i";
503 print " state ", $uu->state;
504 print " mode ", $uu->mode;
505 print " uudet ", strencoding $uu->uudet;
506 print " size ", $uu->size;
507 print " filename ", $uu->filename;
508 print " subfname ", $uu->subfname;
509 print " mimeid ", $uu->mimeid;
510 print " mimetype ", $uu->mimetype;
511 print "\n";
512
513 # print additional info about all parts
514 for ($uu->parts) {
515 while (my ($k, $v) = each %$_) {
516 print "$k > $v, ";
517 }
518 print "\n";
519 }
520
521 $uu->decode_temp;
522 print " temporarily decoded to ", $uu->binfile, "\n";
523 $uu->remove_temp;
524
525 print strerror $uu->decode;
526 print " saved as uudst/", $uu->filename, "\n";
527 }
528
529 print "cleanup...\n";
530
531 CleanUp();
532
341=head1 AUTHOR 533=head1 AUTHOR
342 534
343Marc Lehmann <pcg@goof.com>, the original uulib library was written by 535Marc Lehmann <pcg@goof.com>, the original uulib library was written
344Frank Pilhofer <fp@informatik.uni-frankfurt.de>. 536by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
537bugfixed by Marc Lehmann.
345 538
346=head1 SEE ALSO 539=head1 SEE ALSO
347 540
348perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. 541perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
349 542

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines