ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Convert-UUlib/README
Revision: 1.1
Committed: Tue Mar 8 20:17:59 2005 UTC (19 years, 3 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 NAME
2     Convert::UUlib - Perl interface to the uulib library (a.k.a.
3     uudeview/uuenview).
4    
5     SYNOPSIS
6     use Convert::UUlib ':all';
7    
8     # read all the files named on the commandline and decode them
9     # into the CURRENT directory. See below for a longer example.
10     LoadFile $_ for @ARGV;
11     for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
12     if ($uu->state & FILE_OK) {
13     $uu->decode;
14     print $uu->filename, "\n";
15     }
16     }
17    
18     DESCRIPTION
19     Read the file doc/library.pdf from the distribution for in-depth
20     information about the C-library used in this interface, and the rest of
21     this document and especially the non-trivial decoder program at the end.
22    
23     EXPORTED CONSTANTS
24     Action code constants
25     ACT_IDLE we don't do anything
26     ACT_SCANNING scanning an input file
27     ACT_DECODING decoding into a temp file
28     ACT_COPYING copying temp to target
29     ACT_ENCODING encoding a file
30    
31     Message severity levels
32     MSG_MESSAGE just a message, nothing important
33     MSG_NOTE something that should be noticed
34     MSG_WARNING important msg, processing continues
35     MSG_ERROR processing has been terminated
36     MSG_FATAL decoder cannot process further requests
37     MSG_PANIC recovery impossible, app must terminate
38    
39     Options
40     OPT_VERSION version number MAJOR.MINORplPATCH (ro)
41     OPT_FAST assumes only one part per file
42     OPT_DUMBNESS switch off the program's intelligence
43     OPT_BRACKPOL give numbers in [] higher precendence
44     OPT_VERBOSE generate informative messages
45     OPT_DESPERATE try to decode incomplete files
46     OPT_IGNREPLY ignore RE:plies (off by default)
47     OPT_OVERWRITE whether it's OK to overwrite ex. files
48     OPT_SAVEPATH prefix to save-files on disk
49     OPT_IGNMODE ignore the original file mode
50     OPT_DEBUG print messages with FILE/LINE info
51     OPT_ERRNO get last error code for RET_IOERR (ro)
52     OPT_PROGRESS retrieve progress information
53     OPT_USETEXT handle text messages
54     OPT_PREAMB handle Mime preambles/epilogues
55     OPT_TINYB64 detect short B64 outside of Mime
56     OPT_ENCEXT extension for single-part encoded files
57     OPT_REMOVE remove input files after decoding (dangerous)
58     OPT_MOREMIME strict MIME adherence
59     OPT_DOTDOT ".."-unescaping has not yet been done on input files
60    
61     Result/Error codes
62     RET_OK everything went fine
63     RET_IOERR I/O Error - examine errno
64     RET_NOMEM not enough memory
65     RET_ILLVAL illegal value for operation
66     RET_NODATA decoder didn't find any data
67     RET_NOEND encoded data wasn't ended properly
68     RET_UNSUP unsupported function (encoding)
69     RET_EXISTS file exists (decoding)
70     RET_CONT continue -- special from ScanPart
71     RET_CANCEL operation canceled
72    
73     File States
74     This code is zero, i.e. "false":
75    
76     UUFILE_READ Read in, but not further processed
77    
78     The following state codes are or'ed together:
79    
80     FILE_MISPART Missing Part(s) detected
81     FILE_NOBEGIN No 'begin' found
82     FILE_NOEND No 'end' found
83     FILE_NODATA File does not contain valid uudata
84     FILE_OK All Parts found, ready to decode
85     FILE_ERROR Error while decoding
86     FILE_DECODED Successfully decoded
87     FILE_TMPFILE Temporary decoded file exists
88    
89     Encoding types
90     UU_ENCODED UUencoded data
91     B64_ENCODED Mime-Base64 data
92     XX_ENCODED XXencoded data
93     BH_ENCODED Binhex encoded
94     PT_ENCODED Plain-Text encoded (MIME)
95     QP_ENCODED Quoted-Printable (MIME)
96     YENC_ENCODED yEnc encoded (non-MIME)
97    
98     EXPORTED FUNCTIONS
99     Initializing and cleanup
100     Initialize is automatically called when the module is loaded and
101     allocates quite a small amount of memory for todays machines ;) CleanUp
102     releases that again.
103    
104     On my machine, a fairly complete decode with DBI backend needs about
105     10MB RSS to decode 20000 files.
106    
107     Initialize
108     Not normally necessary, (re-)initializes the library.
109    
110     CleanUp
111     Not normally necessary, could be called at the end to release memory
112     before starting a new decoding round.
113    
114     Setting and querying options
115     $option = GetOption OPT_xxx
116     SetOption OPT_xxx, opt-value
117    
118     See the "OPT_xxx" constants above to see which options exist.
119    
120     Setting various callbacks
121     SetMsgCallback [callback-function]
122     SetBusyCallback [callback-function]
123     SetFileCallback [callback-function]
124     SetFNameFilter [callback-function]
125    
126     Call the currently selected FNameFilter
127     $file = FNameFilter $file
128    
129     Loading sourcefiles, optionally fuzzy merge and start decoding
130     ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
131     Load the given file and scan it for encoded contents. Optionally tag
132     it with the given id, and if $delflag is true, delete the file after
133     it is no longer necessary. If you are certain of the part number,
134     you can specify it as the last argument.
135    
136     A better (usually faster) way of doing this is using the
137     "SetFNameFilter" functionality.
138    
139     $retval = Smerge $pass
140     If you are desperate, try to call "Smerge" with increasing $pass
141     values, beginning at 0, to try to merge parts that usually would not
142     have been merged.
143    
144     Most probably this will result in garbled files, so never do this by
145     default.
146    
147     $item = GetFileListItem $item_number
148     Return the $item structure for the $item_number'th found file, or
149     "undef" of no file with that number exists.
150    
151     The first file has number 0, and the series has no holes, so you can
152     iterate over all files by starting with zero and incrementing until
153     you hit "undef".
154    
155     Decoding files
156     $retval = $item->rename($newname)
157     Change the ondisk filename where the decoded file will be saved.
158    
159     $retval = $item->decode_temp
160     Decode the file into a temporary location, use "$item->infile" to
161     retrieve the temporary filename.
162    
163     $retval = $item->remove_temp
164     Remove the temporarily decoded file again.
165    
166     $retval = $item->decode([$target_path])
167     Decode the file to it's destination, or the given target path.
168    
169     $retval = $item->info(callback-function)
170    
171     Querying (and setting) item attributes
172     $state = $item->state
173     $mode = $item->mode([newmode])
174     $uudet = $item->uudet
175     $size = $item->size
176     $filename = $item->filename([newfilename})
177     $subfname = $item->subfname
178     $mimeid = $item->mimeid
179     $mimetype = $item->mimetype
180     $binfile = $item->binfile
181    
182     Information about source parts
183     $parts = $item->parts
184     Return information about all parts (source files) used to decode the
185     file as a list of hashrefs with the following structure:
186    
187     {
188     partno => <integer describing the part number, starting with 1>,
189     # the following member sonly exist when they contain useful information
190     sfname => <local pathname of the file where this part is from>,
191     filename => <the ondisk filename of the decoded file>,
192     subfname => <used to cluster postings, possibly the posting filename>,
193     subject => <the subject of the posting/mail>,
194     origin => <the possible source (From) address>,
195     mimetype => <the possible mimetype of the decoded file>,
196     mimeid => <the id part of the Content-Type>,
197     }
198    
199     Usually you are interested mostly the "sfname" and possibly the
200     "partno" and "filename" members.
201    
202     Functions below not documented and not very well tested
203     QuickDecode
204     EncodeMulti
205     EncodePartial
206     EncodeToStream
207     EncodeToFile
208     E_PrepSingle
209     E_PrepPartial
210    
211     EXTENSION FUNCTIONS
212     Functions found in this module but not documented in the uulib
213     documentation:
214    
215     $msg = straction ACT_xxx
216     Return a human readable string representing the given action code.
217    
218     $msg = strerror RET_xxx
219     Return a human readable string representing the given error code.
220    
221     $str = strencoding xxx_ENCODED
222     Return the name of the encoding type as a string.
223    
224     $str = strmsglevel MSG_xxx
225     Returns the message level as a string.
226    
227     SetFileNameCallback $cb
228     Sets (or queries) the FileNameCallback, which is called whenever the
229     decoding library can't find a filename and wants to extract a
230     filename from the subject line of a posting. The callback will be
231     called with two arguments, the subject line and the current
232     candidate for the filename. The latter argument can be "undef",
233     which means that no filename could be found (and likely no one
234     exists, so it is safe to also return "undef" in this case). If it
235     doesn't return anything (not even "undef"!), then nothing happens,
236     so this is a no-op callback:
237    
238     sub cb {
239     return ();
240     }
241    
242     If it returns "undef", then this indicates that no filename could be
243     found. In all other cases, the return value is taken to be the
244     filename.
245    
246     This is a slightly more useful callback:
247    
248     sub cb {
249     return unless $_[1]; # skip "Re:"-plies et al.
250     my ($subject, $filename) = @_;
251     # if we find some *.rar, take it
252     return $1 if $subject =~ /(\w+\.rar)/;
253     # otherwise just pass what we have
254     return ();
255     }
256    
257     LARGE EXAMPLE DECODER
258     This is the file "example-decoder" from the distribution, put here
259     instead of more thorough documentation.
260    
261     # decode all the files in the directory uusrc/ and copy
262     # the resulting files to uudst/
263    
264     use Convert::UUlib ':all';
265    
266     sub namefilter {
267     my($path)=@_;
268     $path=~s/^.*[\/\\]//;
269     $path;
270     }
271    
272     sub busycb {
273     my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
274     $_[0]=straction($action);
275     print "busy_callback(", (join ",",@_), ")\n";
276     0;
277     }
278    
279     SetOption OPT_IGNMODE, 1;
280     SetOption OPT_VERBOSE, 1;
281    
282     # show the three ways you can set callback functions. I normally
283     # prefer the one with the sub inplace.
284     SetFNameFilter \&namefilter;
285    
286     SetBusyCallback "busycb", 333;
287    
288     SetMsgCallback sub {
289     my ($msg, $level) = @_;
290     print uc strmsglevel $_[1], ": $msg\n";
291     };
292    
293     # the following non-trivial FileNameCallback takes care
294     # of some subject lines not detected properly by uulib:
295     SetFileNameCallback sub {
296     return unless $_[1]; # skip "Re:"-plies et al.
297     local $_ = $_[0];
298    
299     # the following rules are rather effective on some newsgroups,
300     # like alt.binaries.games.anime, where non-mime, uuencoded data
301     # is very common
302    
303     # if we find some *.rar, take it as the filename
304     return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
305    
306     # one common subject format
307     return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
308    
309     # - filename.par (04/55)
310     return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
311    
312     # - (xxx) No. 1 sayuri81.jpg 756565 bytes
313     # - (20 files) No.17 Roseanne.jpg [2/2]
314     return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
315    
316     # otherwise just pass what we have
317     return ();
318     };
319    
320     # now read all files in the directory uusrc/*
321     for(<uusrc/*>) {
322     my($retval,$count)=LoadFile ($_, $_, 1);
323     print "file($_), status(", strerror $retval, ") parts($count)\n";
324     }
325    
326     SetOption OPT_SAVEPATH, "uudst/";
327    
328     # now wade through all files and their source parts
329     $i = 0;
330     while ($uu = GetFileListItem($i)) {
331     $i++;
332     print "file nr. $i";
333     print " state ", $uu->state;
334     print " mode ", $uu->mode;
335     print " uudet ", strencoding $uu->uudet;
336     print " size ", $uu->size;
337     print " filename ", $uu->filename;
338     print " subfname ", $uu->subfname;
339     print " mimeid ", $uu->mimeid;
340     print " mimetype ", $uu->mimetype;
341     print "\n";
342    
343     # print additional info about all parts
344     for ($uu->parts) {
345     while (my ($k, $v) = each %$_) {
346     print "$k > $v, ";
347     }
348     print "\n";
349     }
350    
351     $uu->decode_temp;
352     print " temporarily decoded to ", $uu->binfile, "\n";
353     $uu->remove_temp;
354    
355     print strerror $uu->decode;
356     print " saved as uudst/", $uu->filename, "\n";
357     }
358    
359     print "cleanup...\n";
360    
361     CleanUp();
362    
363     AUTHOR
364     Marc Lehmann <schmorp@schmorp.de>, the original uulib library was
365     written by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later
366     heavily bugfixed by Marc Lehmann.
367    
368     SEE ALSO
369     perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
370