1 |
root |
1.1 |
NAME |
2 |
|
|
Convert::UUlib - Perl interface to the uulib library (a.k.a. |
3 |
|
|
uudeview/uuenview). |
4 |
|
|
|
5 |
|
|
SYNOPSIS |
6 |
|
|
use Convert::UUlib ':all'; |
7 |
root |
1.5 |
|
8 |
|
|
# read all the files named on the commandline and decode them |
9 |
root |
1.1 |
# into the CURRENT directory. See below for a longer example. |
10 |
|
|
LoadFile $_ for @ARGV; |
11 |
|
|
for (my $i = 0; my $uu = GetFileListItem $i; $i++) { |
12 |
|
|
if ($uu->state & FILE_OK) { |
13 |
|
|
$uu->decode; |
14 |
|
|
print $uu->filename, "\n"; |
15 |
|
|
} |
16 |
|
|
} |
17 |
|
|
|
18 |
|
|
DESCRIPTION |
19 |
|
|
Read the file doc/library.pdf from the distribution for in-depth |
20 |
|
|
information about the C-library used in this interface, and the rest of |
21 |
|
|
this document and especially the non-trivial decoder program at the end. |
22 |
|
|
|
23 |
|
|
EXPORTED CONSTANTS |
24 |
|
|
Action code constants |
25 |
|
|
ACT_IDLE we don't do anything |
26 |
|
|
ACT_SCANNING scanning an input file |
27 |
|
|
ACT_DECODING decoding into a temp file |
28 |
|
|
ACT_COPYING copying temp to target |
29 |
|
|
ACT_ENCODING encoding a file |
30 |
|
|
|
31 |
|
|
Message severity levels |
32 |
|
|
MSG_MESSAGE just a message, nothing important |
33 |
|
|
MSG_NOTE something that should be noticed |
34 |
|
|
MSG_WARNING important msg, processing continues |
35 |
|
|
MSG_ERROR processing has been terminated |
36 |
|
|
MSG_FATAL decoder cannot process further requests |
37 |
|
|
MSG_PANIC recovery impossible, app must terminate |
38 |
|
|
|
39 |
|
|
Options |
40 |
|
|
OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
41 |
|
|
OPT_FAST assumes only one part per file |
42 |
|
|
OPT_DUMBNESS switch off the program's intelligence |
43 |
|
|
OPT_BRACKPOL give numbers in [] higher precendence |
44 |
|
|
OPT_VERBOSE generate informative messages |
45 |
|
|
OPT_DESPERATE try to decode incomplete files |
46 |
|
|
OPT_IGNREPLY ignore RE:plies (off by default) |
47 |
|
|
OPT_OVERWRITE whether it's OK to overwrite ex. files |
48 |
|
|
OPT_SAVEPATH prefix to save-files on disk |
49 |
|
|
OPT_IGNMODE ignore the original file mode |
50 |
|
|
OPT_DEBUG print messages with FILE/LINE info |
51 |
|
|
OPT_ERRNO get last error code for RET_IOERR (ro) |
52 |
|
|
OPT_PROGRESS retrieve progress information |
53 |
|
|
OPT_USETEXT handle text messages |
54 |
|
|
OPT_PREAMB handle Mime preambles/epilogues |
55 |
|
|
OPT_TINYB64 detect short B64 outside of Mime |
56 |
|
|
OPT_ENCEXT extension for single-part encoded files |
57 |
|
|
OPT_REMOVE remove input files after decoding (dangerous) |
58 |
|
|
OPT_MOREMIME strict MIME adherence |
59 |
|
|
OPT_DOTDOT ".."-unescaping has not yet been done on input files |
60 |
root |
1.5 |
OPT_RBUF set default read I/O buffer size in bytes |
61 |
|
|
OPT_WBUF set default write I/O buffer size in bytes |
62 |
|
|
OPT_AUTOCHECK automatically check file list after every loadfile |
63 |
root |
1.1 |
|
64 |
|
|
Result/Error codes |
65 |
|
|
RET_OK everything went fine |
66 |
|
|
RET_IOERR I/O Error - examine errno |
67 |
|
|
RET_NOMEM not enough memory |
68 |
|
|
RET_ILLVAL illegal value for operation |
69 |
|
|
RET_NODATA decoder didn't find any data |
70 |
|
|
RET_NOEND encoded data wasn't ended properly |
71 |
|
|
RET_UNSUP unsupported function (encoding) |
72 |
|
|
RET_EXISTS file exists (decoding) |
73 |
|
|
RET_CONT continue -- special from ScanPart |
74 |
|
|
RET_CANCEL operation canceled |
75 |
|
|
|
76 |
|
|
File States |
77 |
|
|
This code is zero, i.e. "false": |
78 |
|
|
|
79 |
|
|
UUFILE_READ Read in, but not further processed |
80 |
|
|
|
81 |
|
|
The following state codes are or'ed together: |
82 |
|
|
|
83 |
|
|
FILE_MISPART Missing Part(s) detected |
84 |
|
|
FILE_NOBEGIN No 'begin' found |
85 |
|
|
FILE_NOEND No 'end' found |
86 |
|
|
FILE_NODATA File does not contain valid uudata |
87 |
|
|
FILE_OK All Parts found, ready to decode |
88 |
|
|
FILE_ERROR Error while decoding |
89 |
|
|
FILE_DECODED Successfully decoded |
90 |
|
|
FILE_TMPFILE Temporary decoded file exists |
91 |
|
|
|
92 |
|
|
Encoding types |
93 |
|
|
UU_ENCODED UUencoded data |
94 |
|
|
B64_ENCODED Mime-Base64 data |
95 |
|
|
XX_ENCODED XXencoded data |
96 |
|
|
BH_ENCODED Binhex encoded |
97 |
|
|
PT_ENCODED Plain-Text encoded (MIME) |
98 |
|
|
QP_ENCODED Quoted-Printable (MIME) |
99 |
|
|
YENC_ENCODED yEnc encoded (non-MIME) |
100 |
|
|
|
101 |
|
|
EXPORTED FUNCTIONS |
102 |
|
|
Initializing and cleanup |
103 |
|
|
Initialize is automatically called when the module is loaded and |
104 |
|
|
allocates quite a small amount of memory for todays machines ;) CleanUp |
105 |
|
|
releases that again. |
106 |
|
|
|
107 |
|
|
On my machine, a fairly complete decode with DBI backend needs about |
108 |
|
|
10MB RSS to decode 20000 files. |
109 |
|
|
|
110 |
|
|
Initialize |
111 |
|
|
Not normally necessary, (re-)initializes the library. |
112 |
|
|
|
113 |
|
|
CleanUp |
114 |
|
|
Not normally necessary, could be called at the end to release memory |
115 |
|
|
before starting a new decoding round. |
116 |
|
|
|
117 |
|
|
Setting and querying options |
118 |
|
|
$option = GetOption OPT_xxx |
119 |
|
|
SetOption OPT_xxx, opt-value |
120 |
|
|
|
121 |
|
|
See the "OPT_xxx" constants above to see which options exist. |
122 |
|
|
|
123 |
|
|
Setting various callbacks |
124 |
|
|
SetMsgCallback [callback-function] |
125 |
|
|
SetBusyCallback [callback-function] |
126 |
|
|
SetFileCallback [callback-function] |
127 |
|
|
SetFNameFilter [callback-function] |
128 |
|
|
|
129 |
|
|
Call the currently selected FNameFilter |
130 |
|
|
$file = FNameFilter $file |
131 |
|
|
|
132 |
|
|
Loading sourcefiles, optionally fuzzy merge and start decoding |
133 |
|
|
($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
134 |
|
|
Load the given file and scan it for encoded contents. Optionally tag |
135 |
|
|
it with the given id, and if $delflag is true, delete the file after |
136 |
|
|
it is no longer necessary. If you are certain of the part number, |
137 |
|
|
you can specify it as the last argument. |
138 |
|
|
|
139 |
|
|
A better (usually faster) way of doing this is using the |
140 |
|
|
"SetFNameFilter" functionality. |
141 |
|
|
|
142 |
|
|
$retval = Smerge $pass |
143 |
|
|
If you are desperate, try to call "Smerge" with increasing $pass |
144 |
|
|
values, beginning at 0, to try to merge parts that usually would not |
145 |
|
|
have been merged. |
146 |
|
|
|
147 |
|
|
Most probably this will result in garbled files, so never do this by |
148 |
root |
1.5 |
default, except: |
149 |
|
|
|
150 |
|
|
If the "OPT_AUTOCHECK" option has been disabled (by default it is |
151 |
|
|
enabled) to speed up file loading, then you *have* to call "Smerge |
152 |
|
|
-1" after loading all files as an additional pre-pass (which is |
153 |
|
|
normally done by "LoadFile"). |
154 |
root |
1.1 |
|
155 |
|
|
$item = GetFileListItem $item_number |
156 |
|
|
Return the $item structure for the $item_number'th found file, or |
157 |
|
|
"undef" of no file with that number exists. |
158 |
|
|
|
159 |
|
|
The first file has number 0, and the series has no holes, so you can |
160 |
|
|
iterate over all files by starting with zero and incrementing until |
161 |
|
|
you hit "undef". |
162 |
|
|
|
163 |
|
|
Decoding files |
164 |
|
|
$retval = $item->rename($newname) |
165 |
|
|
Change the ondisk filename where the decoded file will be saved. |
166 |
|
|
|
167 |
|
|
$retval = $item->decode_temp |
168 |
|
|
Decode the file into a temporary location, use "$item->infile" to |
169 |
|
|
retrieve the temporary filename. |
170 |
|
|
|
171 |
|
|
$retval = $item->remove_temp |
172 |
|
|
Remove the temporarily decoded file again. |
173 |
|
|
|
174 |
|
|
$retval = $item->decode([$target_path]) |
175 |
|
|
Decode the file to it's destination, or the given target path. |
176 |
|
|
|
177 |
|
|
$retval = $item->info(callback-function) |
178 |
|
|
|
179 |
|
|
Querying (and setting) item attributes |
180 |
|
|
$state = $item->state |
181 |
|
|
$mode = $item->mode([newmode]) |
182 |
|
|
$uudet = $item->uudet |
183 |
|
|
$size = $item->size |
184 |
|
|
$filename = $item->filename([newfilename}) |
185 |
|
|
$subfname = $item->subfname |
186 |
|
|
$mimeid = $item->mimeid |
187 |
|
|
$mimetype = $item->mimetype |
188 |
|
|
$binfile = $item->binfile |
189 |
|
|
|
190 |
|
|
Information about source parts |
191 |
|
|
$parts = $item->parts |
192 |
|
|
Return information about all parts (source files) used to decode the |
193 |
|
|
file as a list of hashrefs with the following structure: |
194 |
|
|
|
195 |
|
|
{ |
196 |
|
|
partno => <integer describing the part number, starting with 1>, |
197 |
|
|
# the following member sonly exist when they contain useful information |
198 |
|
|
sfname => <local pathname of the file where this part is from>, |
199 |
|
|
filename => <the ondisk filename of the decoded file>, |
200 |
|
|
subfname => <used to cluster postings, possibly the posting filename>, |
201 |
|
|
subject => <the subject of the posting/mail>, |
202 |
|
|
origin => <the possible source (From) address>, |
203 |
|
|
mimetype => <the possible mimetype of the decoded file>, |
204 |
|
|
mimeid => <the id part of the Content-Type>, |
205 |
|
|
} |
206 |
|
|
|
207 |
|
|
Usually you are interested mostly the "sfname" and possibly the |
208 |
|
|
"partno" and "filename" members. |
209 |
|
|
|
210 |
|
|
Functions below not documented and not very well tested |
211 |
|
|
QuickDecode |
212 |
|
|
EncodeMulti |
213 |
|
|
EncodePartial |
214 |
|
|
EncodeToStream |
215 |
|
|
EncodeToFile |
216 |
|
|
E_PrepSingle |
217 |
|
|
E_PrepPartial |
218 |
|
|
|
219 |
|
|
EXTENSION FUNCTIONS |
220 |
|
|
Functions found in this module but not documented in the uulib |
221 |
|
|
documentation: |
222 |
|
|
|
223 |
|
|
$msg = straction ACT_xxx |
224 |
|
|
Return a human readable string representing the given action code. |
225 |
|
|
|
226 |
|
|
$msg = strerror RET_xxx |
227 |
|
|
Return a human readable string representing the given error code. |
228 |
|
|
|
229 |
|
|
$str = strencoding xxx_ENCODED |
230 |
|
|
Return the name of the encoding type as a string. |
231 |
|
|
|
232 |
|
|
$str = strmsglevel MSG_xxx |
233 |
|
|
Returns the message level as a string. |
234 |
|
|
|
235 |
|
|
SetFileNameCallback $cb |
236 |
|
|
Sets (or queries) the FileNameCallback, which is called whenever the |
237 |
|
|
decoding library can't find a filename and wants to extract a |
238 |
|
|
filename from the subject line of a posting. The callback will be |
239 |
|
|
called with two arguments, the subject line and the current |
240 |
|
|
candidate for the filename. The latter argument can be "undef", |
241 |
|
|
which means that no filename could be found (and likely no one |
242 |
|
|
exists, so it is safe to also return "undef" in this case). If it |
243 |
|
|
doesn't return anything (not even "undef"!), then nothing happens, |
244 |
|
|
so this is a no-op callback: |
245 |
|
|
|
246 |
|
|
sub cb { |
247 |
|
|
return (); |
248 |
|
|
} |
249 |
|
|
|
250 |
|
|
If it returns "undef", then this indicates that no filename could be |
251 |
|
|
found. In all other cases, the return value is taken to be the |
252 |
|
|
filename. |
253 |
|
|
|
254 |
|
|
This is a slightly more useful callback: |
255 |
|
|
|
256 |
|
|
sub cb { |
257 |
|
|
return unless $_[1]; # skip "Re:"-plies et al. |
258 |
|
|
my ($subject, $filename) = @_; |
259 |
|
|
# if we find some *.rar, take it |
260 |
|
|
return $1 if $subject =~ /(\w+\.rar)/; |
261 |
|
|
# otherwise just pass what we have |
262 |
|
|
return (); |
263 |
|
|
} |
264 |
|
|
|
265 |
|
|
LARGE EXAMPLE DECODER |
266 |
|
|
This is the file "example-decoder" from the distribution, put here |
267 |
|
|
instead of more thorough documentation. |
268 |
|
|
|
269 |
root |
1.4 |
#!/usr/bin/perl |
270 |
root |
1.1 |
|
271 |
root |
1.4 |
# decode all the files in the directory uusrc/ and copy |
272 |
|
|
# the resulting files to uudst/ |
273 |
root |
1.1 |
|
274 |
root |
1.4 |
use Convert::UUlib ':all'; |
275 |
root |
1.1 |
|
276 |
root |
1.4 |
sub namefilter { |
277 |
|
|
my ($path) = @_; |
278 |
|
|
|
279 |
|
|
$path=~s/^.*[\/\\]//; |
280 |
|
|
|
281 |
|
|
$path |
282 |
|
|
} |
283 |
|
|
|
284 |
|
|
sub busycb { |
285 |
|
|
my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; |
286 |
|
|
$_[0]=straction($action); |
287 |
|
|
print "busy_callback(", (join ",",@_), ")\n"; |
288 |
|
|
0 |
289 |
|
|
} |
290 |
|
|
|
291 |
|
|
SetOption OPT_RBUF, 128*1024; |
292 |
|
|
SetOption OPT_WBUF, 1024*1024; |
293 |
|
|
SetOption OPT_IGNMODE, 1; |
294 |
|
|
SetOption OPT_IGNMODE, 1; |
295 |
|
|
SetOption OPT_VERBOSE, 1; |
296 |
|
|
|
297 |
|
|
# show the three ways you can set callback functions. I normally |
298 |
|
|
# prefer the one with the sub inplace. |
299 |
|
|
SetFNameFilter \&namefilter; |
300 |
|
|
|
301 |
|
|
SetBusyCallback "busycb", 333; |
302 |
|
|
|
303 |
|
|
SetMsgCallback sub { |
304 |
|
|
my ($msg, $level) = @_; |
305 |
|
|
print uc strmsglevel $_[1], ": $msg\n"; |
306 |
|
|
}; |
307 |
|
|
|
308 |
|
|
# the following non-trivial FileNameCallback takes care |
309 |
|
|
# of some subject lines not detected properly by uulib: |
310 |
|
|
SetFileNameCallback sub { |
311 |
|
|
return unless $_[1]; # skip "Re:"-plies et al. |
312 |
|
|
local $_ = $_[0]; |
313 |
|
|
|
314 |
|
|
# the following rules are rather effective on some newsgroups, |
315 |
|
|
# like alt.binaries.games.anime, where non-mime, uuencoded data |
316 |
|
|
# is very common |
317 |
|
|
|
318 |
|
|
# if we find some *.rar, take it as the filename |
319 |
|
|
return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; |
320 |
|
|
|
321 |
|
|
# one common subject format |
322 |
|
|
return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; |
323 |
|
|
|
324 |
|
|
# - filename.par (04/55) |
325 |
|
|
return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; |
326 |
|
|
|
327 |
|
|
# - (xxx) No. 1 sayuri81.jpg 756565 bytes |
328 |
|
|
# - (20 files) No.17 Roseanne.jpg [2/2] |
329 |
|
|
return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; |
330 |
|
|
|
331 |
|
|
# try to detect some common forms of filenames |
332 |
|
|
return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i; |
333 |
|
|
|
334 |
|
|
# otherwise just pass what we have |
335 |
|
|
() |
336 |
|
|
}; |
337 |
|
|
|
338 |
|
|
# now read all files in the directory uusrc/* |
339 |
|
|
for(<uusrc/*>) { |
340 |
|
|
my ($retval, $count) = LoadFile ($_, $_, 1); |
341 |
|
|
print "file($_), status(", strerror $retval, ") parts($count)\n"; |
342 |
|
|
} |
343 |
|
|
|
344 |
|
|
SetOption OPT_SAVEPATH, "uudst/"; |
345 |
|
|
|
346 |
|
|
# now wade through all files and their source parts |
347 |
|
|
$i = 0; |
348 |
|
|
while ($uu = GetFileListItem $i) { |
349 |
|
|
$i++; |
350 |
|
|
print "file nr. $i"; |
351 |
|
|
print " state ", $uu->state; |
352 |
|
|
print " mode ", $uu->mode; |
353 |
|
|
print " uudet ", strencoding $uu->uudet; |
354 |
|
|
print " size ", $uu->size; |
355 |
|
|
print " filename ", $uu->filename; |
356 |
|
|
print " subfname ", $uu->subfname; |
357 |
|
|
print " mimeid ", $uu->mimeid; |
358 |
|
|
print " mimetype ", $uu->mimetype; |
359 |
|
|
print "\n"; |
360 |
|
|
|
361 |
|
|
# print additional info about all parts |
362 |
|
|
for ($uu->parts) { |
363 |
|
|
while (my ($k, $v) = each %$_) { |
364 |
|
|
print "$k > $v, "; |
365 |
|
|
} |
366 |
|
|
print "\n"; |
367 |
|
|
} |
368 |
|
|
|
369 |
|
|
print $uu->filename; |
370 |
|
|
|
371 |
|
|
$uu->remove_temp; |
372 |
|
|
|
373 |
|
|
if (my $err = $uu->decode ()) { |
374 |
|
|
print ", ", strerror $err, "\n"; |
375 |
|
|
} else { |
376 |
|
|
print ", saved as uudst/", $uu->filename, "\n"; |
377 |
|
|
} |
378 |
|
|
} |
379 |
root |
1.1 |
|
380 |
root |
1.4 |
print "cleanup...\n"; |
381 |
|
|
|
382 |
|
|
CleanUp; |
383 |
root |
1.1 |
|
384 |
|
|
AUTHOR |
385 |
|
|
Marc Lehmann <schmorp@schmorp.de>, the original uulib library was |
386 |
|
|
written by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later |
387 |
|
|
heavily bugfixed by Marc Lehmann. |
388 |
|
|
|
389 |
|
|
SEE ALSO |
390 |
|
|
perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. |
391 |
|
|
|