1 |
root |
1.1 |
package Convert::UUlib; |
2 |
|
|
|
3 |
root |
1.49 |
use common::sense; |
4 |
root |
1.28 |
|
5 |
root |
1.1 |
use Carp; |
6 |
|
|
|
7 |
|
|
require Exporter; |
8 |
|
|
require DynaLoader; |
9 |
|
|
|
10 |
root |
1.52 |
our $VERSION = 1.71; |
11 |
root |
1.1 |
|
12 |
root |
1.28 |
our @ISA = qw(Exporter DynaLoader); |
13 |
root |
1.1 |
|
14 |
root |
1.28 |
our @_consts = qw( |
15 |
root |
1.1 |
ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
16 |
|
|
|
17 |
|
|
FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA |
18 |
|
|
FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE |
19 |
|
|
|
20 |
|
|
MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING |
21 |
|
|
|
22 |
root |
1.25 |
OPT_RBUF OPT_WBUF |
23 |
root |
1.1 |
OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT |
24 |
|
|
OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB |
25 |
|
|
OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE |
26 |
root |
1.34 |
OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK |
27 |
root |
1.1 |
|
28 |
|
|
RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA |
29 |
|
|
RET_NOEND RET_NOMEM RET_OK RET_UNSUP |
30 |
|
|
|
31 |
root |
1.11 |
B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED |
32 |
root |
1.5 |
XX_ENCODED UU_ENCODED YENC_ENCODED |
33 |
root |
1.1 |
); |
34 |
|
|
|
35 |
root |
1.28 |
our @_funcs = qw( |
36 |
root |
1.6 |
Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
37 |
|
|
SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
38 |
root |
1.47 |
FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp |
39 |
root |
1.6 |
RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
40 |
|
|
EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
41 |
root |
1.35 |
E_PrepPartial |
42 |
root |
1.1 |
|
43 |
|
|
straction strencoding strmsglevel |
44 |
|
|
); |
45 |
|
|
|
46 |
root |
1.28 |
our @EXPORT = @_consts; |
47 |
|
|
our @EXPORT_OK = @_funcs; |
48 |
|
|
our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
49 |
root |
1.1 |
|
50 |
|
|
bootstrap Convert::UUlib $VERSION; |
51 |
|
|
|
52 |
root |
1.48 |
# dummy function for compatiiblity with pre-1.7 versions |
53 |
|
|
sub Initialize { } |
54 |
root |
1.1 |
|
55 |
|
|
# action code -> string mapping |
56 |
|
|
sub straction($) { |
57 |
|
|
return 'copying' if $_[0] == &ACT_COPYING; |
58 |
|
|
return 'decoding' if $_[0] == &ACT_DECODING; |
59 |
|
|
return 'encoding' if $_[0] == &ACT_ENCODING; |
60 |
|
|
return 'idle' if $_[0] == &ACT_IDLE; |
61 |
|
|
return 'scanning' if $_[0] == &ACT_SCANNING; |
62 |
root |
1.5 |
'unknown'; |
63 |
root |
1.1 |
} |
64 |
|
|
|
65 |
|
|
# encoding type -> string mapping |
66 |
|
|
sub strencoding($) { |
67 |
root |
1.5 |
return 'uuencode' if $_[0] == &UU_ENCODED; |
68 |
root |
1.11 |
return 'base64' if $_[0] == &B64_ENCODED; |
69 |
root |
1.5 |
return 'yenc' if $_[0] == &YENC_ENCODED; |
70 |
root |
1.1 |
return 'binhex' if $_[0] == &BH_ENCODED; |
71 |
|
|
return 'plaintext' if $_[0] == &PT_ENCODED; |
72 |
|
|
return 'quoted-printable' if $_[0] == &QP_ENCODED; |
73 |
|
|
return 'xxencode' if $_[0] == &XX_ENCODED; |
74 |
root |
1.5 |
'unknown'; |
75 |
root |
1.1 |
} |
76 |
|
|
|
77 |
|
|
sub strmsglevel($) { |
78 |
|
|
return 'message' if $_[0] == &MSG_MESSAGE; |
79 |
|
|
return 'note' if $_[0] == &MSG_NOTE; |
80 |
|
|
return 'warning' if $_[0] == &MSG_WARNING; |
81 |
|
|
return 'error' if $_[0] == &MSG_ERROR; |
82 |
|
|
return 'panic' if $_[0] == &MSG_PANIC; |
83 |
|
|
return 'fatal' if $_[0] == &MSG_FATAL; |
84 |
root |
1.5 |
'unknown'; |
85 |
root |
1.1 |
} |
86 |
|
|
|
87 |
|
|
1; |
88 |
|
|
__END__ |
89 |
|
|
|
90 |
|
|
=head1 NAME |
91 |
|
|
|
92 |
|
|
Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). |
93 |
|
|
|
94 |
|
|
=head1 SYNOPSIS |
95 |
|
|
|
96 |
|
|
use Convert::UUlib ':all'; |
97 |
|
|
|
98 |
root |
1.11 |
# read all the files named on the commandline and decode them |
99 |
root |
1.13 |
# into the CURRENT directory. See below for a longer example. |
100 |
|
|
LoadFile $_ for @ARGV; |
101 |
root |
1.48 |
|
102 |
root |
1.47 |
for my $uu (GetFileList) { |
103 |
root |
1.13 |
if ($uu->state & FILE_OK) { |
104 |
|
|
$uu->decode; |
105 |
|
|
print $uu->filename, "\n"; |
106 |
|
|
} |
107 |
root |
1.1 |
} |
108 |
|
|
|
109 |
root |
1.11 |
=head1 DESCRIPTION |
110 |
root |
1.1 |
|
111 |
root |
1.11 |
Read the file doc/library.pdf from the distribution for in-depth |
112 |
|
|
information about the C-library used in this interface, and the rest of |
113 |
|
|
this document and especially the non-trivial decoder program at the end. |
114 |
|
|
|
115 |
|
|
=head1 EXPORTED CONSTANTS |
116 |
|
|
|
117 |
|
|
=head2 Action code constants |
118 |
|
|
|
119 |
|
|
ACT_IDLE we don't do anything |
120 |
|
|
ACT_SCANNING scanning an input file |
121 |
|
|
ACT_DECODING decoding into a temp file |
122 |
|
|
ACT_COPYING copying temp to target |
123 |
|
|
ACT_ENCODING encoding a file |
124 |
|
|
|
125 |
|
|
=head2 Message severity levels |
126 |
|
|
|
127 |
|
|
MSG_MESSAGE just a message, nothing important |
128 |
|
|
MSG_NOTE something that should be noticed |
129 |
|
|
MSG_WARNING important msg, processing continues |
130 |
|
|
MSG_ERROR processing has been terminated |
131 |
|
|
MSG_FATAL decoder cannot process further requests |
132 |
|
|
MSG_PANIC recovery impossible, app must terminate |
133 |
|
|
|
134 |
|
|
=head2 Options |
135 |
|
|
|
136 |
|
|
OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
137 |
|
|
OPT_FAST assumes only one part per file |
138 |
|
|
OPT_DUMBNESS switch off the program's intelligence |
139 |
|
|
OPT_BRACKPOL give numbers in [] higher precendence |
140 |
|
|
OPT_VERBOSE generate informative messages |
141 |
|
|
OPT_DESPERATE try to decode incomplete files |
142 |
|
|
OPT_IGNREPLY ignore RE:plies (off by default) |
143 |
|
|
OPT_OVERWRITE whether it's OK to overwrite ex. files |
144 |
|
|
OPT_SAVEPATH prefix to save-files on disk |
145 |
|
|
OPT_IGNMODE ignore the original file mode |
146 |
|
|
OPT_DEBUG print messages with FILE/LINE info |
147 |
|
|
OPT_ERRNO get last error code for RET_IOERR (ro) |
148 |
|
|
OPT_PROGRESS retrieve progress information |
149 |
|
|
OPT_USETEXT handle text messages |
150 |
|
|
OPT_PREAMB handle Mime preambles/epilogues |
151 |
|
|
OPT_TINYB64 detect short B64 outside of Mime |
152 |
|
|
OPT_ENCEXT extension for single-part encoded files |
153 |
root |
1.14 |
OPT_REMOVE remove input files after decoding (dangerous) |
154 |
root |
1.11 |
OPT_MOREMIME strict MIME adherence |
155 |
root |
1.15 |
OPT_DOTDOT ".."-unescaping has not yet been done on input files |
156 |
root |
1.35 |
OPT_RBUF set default read I/O buffer size in bytes |
157 |
|
|
OPT_WBUF set default write I/O buffer size in bytes |
158 |
|
|
OPT_AUTOCHECK automatically check file list after every loadfile |
159 |
root |
1.11 |
|
160 |
|
|
=head2 Result/Error codes |
161 |
|
|
|
162 |
|
|
RET_OK everything went fine |
163 |
|
|
RET_IOERR I/O Error - examine errno |
164 |
|
|
RET_NOMEM not enough memory |
165 |
|
|
RET_ILLVAL illegal value for operation |
166 |
|
|
RET_NODATA decoder didn't find any data |
167 |
|
|
RET_NOEND encoded data wasn't ended properly |
168 |
|
|
RET_UNSUP unsupported function (encoding) |
169 |
|
|
RET_EXISTS file exists (decoding) |
170 |
|
|
RET_CONT continue -- special from ScanPart |
171 |
|
|
RET_CANCEL operation canceled |
172 |
|
|
|
173 |
|
|
=head2 File States |
174 |
|
|
|
175 |
|
|
This code is zero, i.e. "false": |
176 |
|
|
|
177 |
|
|
UUFILE_READ Read in, but not further processed |
178 |
|
|
|
179 |
root |
1.13 |
The following state codes are or'ed together: |
180 |
root |
1.11 |
|
181 |
|
|
FILE_MISPART Missing Part(s) detected |
182 |
|
|
FILE_NOBEGIN No 'begin' found |
183 |
|
|
FILE_NOEND No 'end' found |
184 |
|
|
FILE_NODATA File does not contain valid uudata |
185 |
|
|
FILE_OK All Parts found, ready to decode |
186 |
|
|
FILE_ERROR Error while decoding |
187 |
|
|
FILE_DECODED Successfully decoded |
188 |
|
|
FILE_TMPFILE Temporary decoded file exists |
189 |
|
|
|
190 |
|
|
=head2 Encoding types |
191 |
|
|
|
192 |
|
|
UU_ENCODED UUencoded data |
193 |
|
|
B64_ENCODED Mime-Base64 data |
194 |
|
|
XX_ENCODED XXencoded data |
195 |
|
|
BH_ENCODED Binhex encoded |
196 |
|
|
PT_ENCODED Plain-Text encoded (MIME) |
197 |
|
|
QP_ENCODED Quoted-Printable (MIME) |
198 |
|
|
YENC_ENCODED yEnc encoded (non-MIME) |
199 |
root |
1.1 |
|
200 |
root |
1.11 |
=head1 EXPORTED FUNCTIONS |
201 |
root |
1.1 |
|
202 |
root |
1.11 |
=head2 Initializing and cleanup |
203 |
root |
1.1 |
|
204 |
root |
1.11 |
Initialize is automatically called when the module is loaded and allocates |
205 |
root |
1.13 |
quite a small amount of memory for todays machines ;) CleanUp releases that |
206 |
|
|
again. |
207 |
root |
1.14 |
|
208 |
|
|
On my machine, a fairly complete decode with DBI backend needs about 10MB |
209 |
|
|
RSS to decode 20000 files. |
210 |
root |
1.1 |
|
211 |
root |
1.46 |
=over |
212 |
root |
1.13 |
|
213 |
|
|
=item CleanUp |
214 |
|
|
|
215 |
root |
1.48 |
Release memory, file items and clean up files. Should be called after a |
216 |
|
|
decoidng run, if you want to start a new one. |
217 |
root |
1.13 |
|
218 |
|
|
=back |
219 |
root |
1.1 |
|
220 |
root |
1.11 |
=head2 Setting and querying options |
221 |
root |
1.1 |
|
222 |
root |
1.46 |
=over |
223 |
root |
1.13 |
|
224 |
|
|
=item $option = GetOption OPT_xxx |
225 |
|
|
|
226 |
|
|
=item SetOption OPT_xxx, opt-value |
227 |
|
|
|
228 |
|
|
=back |
229 |
|
|
|
230 |
|
|
See the C<OPT_xxx> constants above to see which options exist. |
231 |
root |
1.1 |
|
232 |
root |
1.11 |
=head2 Setting various callbacks |
233 |
root |
1.1 |
|
234 |
root |
1.46 |
=over |
235 |
root |
1.13 |
|
236 |
|
|
=item SetMsgCallback [callback-function] |
237 |
|
|
|
238 |
|
|
=item SetBusyCallback [callback-function] |
239 |
|
|
|
240 |
|
|
=item SetFileCallback [callback-function] |
241 |
|
|
|
242 |
|
|
=item SetFNameFilter [callback-function] |
243 |
|
|
|
244 |
|
|
=back |
245 |
root |
1.1 |
|
246 |
root |
1.11 |
=head2 Call the currently selected FNameFilter |
247 |
root |
1.1 |
|
248 |
root |
1.46 |
=over |
249 |
root |
1.13 |
|
250 |
|
|
=item $file = FNameFilter $file |
251 |
|
|
|
252 |
|
|
=back |
253 |
root |
1.1 |
|
254 |
root |
1.11 |
=head2 Loading sourcefiles, optionally fuzzy merge and start decoding |
255 |
root |
1.1 |
|
256 |
root |
1.46 |
=over |
257 |
root |
1.13 |
|
258 |
root |
1.16 |
=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
259 |
root |
1.13 |
|
260 |
|
|
Load the given file and scan it for encoded contents. Optionally tag it |
261 |
root |
1.16 |
with the given id, and if C<$delflag> is true, delete the file after it |
262 |
root |
1.18 |
is no longer necessary. If you are certain of the part number, you can |
263 |
|
|
specify it as the last argument. |
264 |
root |
1.16 |
|
265 |
|
|
A better (usually faster) way of doing this is using the C<SetFNameFilter> |
266 |
|
|
functionality. |
267 |
root |
1.13 |
|
268 |
|
|
=item $retval = Smerge $pass |
269 |
|
|
|
270 |
|
|
If you are desperate, try to call C<Smerge> with increasing C<$pass> |
271 |
|
|
values, beginning at C<0>, to try to merge parts that usually would not |
272 |
|
|
have been merged. |
273 |
|
|
|
274 |
|
|
Most probably this will result in garbled files, so never do this by |
275 |
root |
1.35 |
default, except: |
276 |
|
|
|
277 |
|
|
If the C<OPT_AUTOCHECK> option has been disabled (by default it is |
278 |
|
|
enabled) to speed up file loading, then you I<have> to call C<Smerge -1> |
279 |
|
|
after loading all files as an additional pre-pass (which is normally done |
280 |
|
|
by C<LoadFile>). |
281 |
root |
1.13 |
|
282 |
|
|
=item $item = GetFileListItem $item_number |
283 |
|
|
|
284 |
|
|
Return the C<$item> structure for the C<$item_number>'th found file, or |
285 |
|
|
C<undef> of no file with that number exists. |
286 |
|
|
|
287 |
|
|
The first file has number C<0>, and the series has no holes, so you can |
288 |
|
|
iterate over all files by starting with zero and incrementing until you |
289 |
|
|
hit C<undef>. |
290 |
|
|
|
291 |
root |
1.47 |
This function has to walk the linear list of fils on each access, so |
292 |
|
|
if you want to iterate over all items, it is usually faster to use |
293 |
|
|
C<GetFileList>. |
294 |
|
|
|
295 |
|
|
=item @items = GetFileList |
296 |
|
|
|
297 |
|
|
Similar to C<GetFileListItem>, but returns all files in one go. |
298 |
|
|
|
299 |
root |
1.13 |
=back |
300 |
|
|
|
301 |
|
|
=head2 Decoding files |
302 |
|
|
|
303 |
root |
1.46 |
=over |
304 |
root |
1.13 |
|
305 |
root |
1.46 |
=item $retval = $item->rename ($newname) |
306 |
root |
1.13 |
|
307 |
|
|
Change the ondisk filename where the decoded file will be saved. |
308 |
|
|
|
309 |
|
|
=item $retval = $item->decode_temp |
310 |
|
|
|
311 |
|
|
Decode the file into a temporary location, use C<< $item->infile >> to |
312 |
|
|
retrieve the temporary filename. |
313 |
|
|
|
314 |
|
|
=item $retval = $item->remove_temp |
315 |
|
|
|
316 |
|
|
Remove the temporarily decoded file again. |
317 |
|
|
|
318 |
root |
1.46 |
=item $retval = $item->decode ([$target_path]) |
319 |
root |
1.13 |
|
320 |
root |
1.46 |
Decode the file to its destination, or the given target path. |
321 |
root |
1.13 |
|
322 |
root |
1.46 |
=item $retval = $item->info (callback-function) |
323 |
root |
1.13 |
|
324 |
|
|
=back |
325 |
root |
1.1 |
|
326 |
root |
1.11 |
=head2 Querying (and setting) item attributes |
327 |
root |
1.1 |
|
328 |
root |
1.46 |
=over |
329 |
root |
1.13 |
|
330 |
|
|
=item $state = $item->state |
331 |
|
|
|
332 |
root |
1.46 |
=item $mode = $item->mode ([newmode]) |
333 |
root |
1.13 |
|
334 |
|
|
=item $uudet = $item->uudet |
335 |
|
|
|
336 |
|
|
=item $size = $item->size |
337 |
|
|
|
338 |
root |
1.46 |
=item $filename = $item->filename ([newfilename}) |
339 |
root |
1.13 |
|
340 |
|
|
=item $subfname = $item->subfname |
341 |
|
|
|
342 |
|
|
=item $mimeid = $item->mimeid |
343 |
|
|
|
344 |
|
|
=item $mimetype = $item->mimetype |
345 |
|
|
|
346 |
|
|
=item $binfile = $item->binfile |
347 |
|
|
|
348 |
|
|
=back |
349 |
|
|
|
350 |
|
|
=head2 Information about source parts |
351 |
|
|
|
352 |
root |
1.46 |
=over |
353 |
root |
1.13 |
|
354 |
|
|
=item $parts = $item->parts |
355 |
|
|
|
356 |
|
|
Return information about all parts (source files) used to decode the file |
357 |
|
|
as a list of hashrefs with the following structure: |
358 |
|
|
|
359 |
|
|
{ |
360 |
|
|
partno => <integer describing the part number, starting with 1>, |
361 |
|
|
# the following member sonly exist when they contain useful information |
362 |
|
|
sfname => <local pathname of the file where this part is from>, |
363 |
|
|
filename => <the ondisk filename of the decoded file>, |
364 |
|
|
subfname => <used to cluster postings, possibly the posting filename>, |
365 |
|
|
subject => <the subject of the posting/mail>, |
366 |
|
|
origin => <the possible source (From) address>, |
367 |
|
|
mimetype => <the possible mimetype of the decoded file>, |
368 |
|
|
mimeid => <the id part of the Content-Type>, |
369 |
|
|
} |
370 |
root |
1.1 |
|
371 |
root |
1.13 |
Usually you are interested mostly the C<sfname> and possibly the C<partno> |
372 |
|
|
and C<filename> members. |
373 |
root |
1.1 |
|
374 |
root |
1.13 |
=back |
375 |
root |
1.1 |
|
376 |
root |
1.46 |
=head2 Functions below are not documented and not very well tested - feedback welcome |
377 |
root |
1.1 |
|
378 |
root |
1.11 |
QuickDecode |
379 |
|
|
EncodeMulti |
380 |
|
|
EncodePartial |
381 |
|
|
EncodeToStream |
382 |
|
|
EncodeToFile |
383 |
|
|
E_PrepSingle |
384 |
|
|
E_PrepPartial |
385 |
root |
1.6 |
|
386 |
|
|
=head2 EXTENSION FUNCTIONS |
387 |
|
|
|
388 |
|
|
Functions found in this module but not documented in the uulib documentation: |
389 |
|
|
|
390 |
root |
1.46 |
=over |
391 |
root |
1.6 |
|
392 |
root |
1.11 |
=item $msg = straction ACT_xxx |
393 |
|
|
|
394 |
|
|
Return a human readable string representing the given action code. |
395 |
|
|
|
396 |
|
|
=item $msg = strerror RET_xxx |
397 |
|
|
|
398 |
|
|
Return a human readable string representing the given error code. |
399 |
|
|
|
400 |
|
|
=item $str = strencoding xxx_ENCODED |
401 |
|
|
|
402 |
|
|
Return the name of the encoding type as a string. |
403 |
|
|
|
404 |
|
|
=item $str = strmsglevel MSG_xxx |
405 |
|
|
|
406 |
|
|
Returns the message level as a string. |
407 |
|
|
|
408 |
root |
1.6 |
=item SetFileNameCallback $cb |
409 |
|
|
|
410 |
|
|
Sets (or queries) the FileNameCallback, which is called whenever the |
411 |
|
|
decoding library can't find a filename and wants to extract a filename |
412 |
|
|
from the subject line of a posting. The callback will be called with |
413 |
|
|
two arguments, the subject line and the current candidate for the |
414 |
|
|
filename. The latter argument can be C<undef>, which means that no |
415 |
|
|
filename could be found (and likely no one exists, so it is safe to also |
416 |
|
|
return C<undef> in this case). If it doesn't return anything (not even |
417 |
|
|
C<undef>!), then nothing happens, so this is a no-op callback: |
418 |
|
|
|
419 |
|
|
sub cb { |
420 |
|
|
return (); |
421 |
|
|
} |
422 |
|
|
|
423 |
|
|
If it returns C<undef>, then this indicates that no filename could be |
424 |
|
|
found. In all other cases, the return value is taken to be the filename. |
425 |
|
|
|
426 |
|
|
This is a slightly more useful callback: |
427 |
|
|
|
428 |
|
|
sub cb { |
429 |
|
|
return unless $_[1]; # skip "Re:"-plies et al. |
430 |
|
|
my ($subject, $filename) = @_; |
431 |
|
|
# if we find some *.rar, take it |
432 |
|
|
return $1 if $subject =~ /(\w+\.rar)/; |
433 |
|
|
# otherwise just pass what we have |
434 |
|
|
return (); |
435 |
|
|
} |
436 |
|
|
|
437 |
|
|
=back |
438 |
root |
1.1 |
|
439 |
root |
1.11 |
=head1 LARGE EXAMPLE DECODER |
440 |
|
|
|
441 |
root |
1.48 |
The general workflow for decoding is like this: |
442 |
|
|
|
443 |
|
|
=over |
444 |
|
|
|
445 |
|
|
=item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
446 |
|
|
|
447 |
|
|
=item 2. Load all source files with C<LoadFile>. |
448 |
|
|
|
449 |
|
|
=item 3. Optionally C<Smerge>. |
450 |
|
|
|
451 |
|
|
=item 4. Iterate over all C<GetFileList> items (i.e. result files). |
452 |
|
|
|
453 |
|
|
=item 5. C<CleanUp> to delete files and free items. |
454 |
|
|
|
455 |
|
|
=back |
456 |
|
|
|
457 |
|
|
What follows is the file C<example-decoder> from the distribution that |
458 |
|
|
illustrates the above worklfow in a non-trivial example. |
459 |
root |
1.11 |
|
460 |
root |
1.32 |
#!/usr/bin/perl |
461 |
root |
1.11 |
|
462 |
root |
1.32 |
# decode all the files in the directory uusrc/ and copy |
463 |
|
|
# the resulting files to uudst/ |
464 |
root |
1.11 |
|
465 |
root |
1.32 |
use Convert::UUlib ':all'; |
466 |
root |
1.11 |
|
467 |
root |
1.32 |
sub namefilter { |
468 |
|
|
my ($path) = @_; |
469 |
root |
1.11 |
|
470 |
root |
1.32 |
$path=~s/^.*[\/\\]//; |
471 |
root |
1.11 |
|
472 |
root |
1.32 |
$path |
473 |
|
|
} |
474 |
root |
1.11 |
|
475 |
root |
1.32 |
sub busycb { |
476 |
|
|
my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; |
477 |
|
|
$_[0]=straction($action); |
478 |
|
|
print "busy_callback(", (join ",",@_), ")\n"; |
479 |
|
|
0 |
480 |
|
|
} |
481 |
root |
1.11 |
|
482 |
root |
1.32 |
SetOption OPT_RBUF, 128*1024; |
483 |
|
|
SetOption OPT_WBUF, 1024*1024; |
484 |
|
|
SetOption OPT_IGNMODE, 1; |
485 |
|
|
SetOption OPT_IGNMODE, 1; |
486 |
|
|
SetOption OPT_VERBOSE, 1; |
487 |
root |
1.53 |
SetOption OPT_AUTOCHK, 0; |
488 |
root |
1.32 |
|
489 |
|
|
# show the three ways you can set callback functions. I normally |
490 |
|
|
# prefer the one with the sub inplace. |
491 |
|
|
SetFNameFilter \&namefilter; |
492 |
|
|
|
493 |
|
|
SetBusyCallback "busycb", 333; |
494 |
|
|
|
495 |
|
|
SetMsgCallback sub { |
496 |
|
|
my ($msg, $level) = @_; |
497 |
|
|
print uc strmsglevel $_[1], ": $msg\n"; |
498 |
|
|
}; |
499 |
|
|
|
500 |
|
|
# the following non-trivial FileNameCallback takes care |
501 |
|
|
# of some subject lines not detected properly by uulib: |
502 |
|
|
SetFileNameCallback sub { |
503 |
|
|
return unless $_[1]; # skip "Re:"-plies et al. |
504 |
|
|
local $_ = $_[0]; |
505 |
|
|
|
506 |
|
|
# the following rules are rather effective on some newsgroups, |
507 |
|
|
# like alt.binaries.games.anime, where non-mime, uuencoded data |
508 |
|
|
# is very common |
509 |
|
|
|
510 |
|
|
# if we find some *.rar, take it as the filename |
511 |
|
|
return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; |
512 |
|
|
|
513 |
|
|
# one common subject format |
514 |
|
|
return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; |
515 |
|
|
|
516 |
|
|
# - filename.par (04/55) |
517 |
|
|
return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; |
518 |
|
|
|
519 |
|
|
# - (xxx) No. 1 sayuri81.jpg 756565 bytes |
520 |
|
|
# - (20 files) No.17 Roseanne.jpg [2/2] |
521 |
|
|
return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; |
522 |
|
|
|
523 |
|
|
# try to detect some common forms of filenames |
524 |
|
|
return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i; |
525 |
|
|
|
526 |
|
|
# otherwise just pass what we have |
527 |
|
|
() |
528 |
|
|
}; |
529 |
|
|
|
530 |
|
|
# now read all files in the directory uusrc/* |
531 |
root |
1.47 |
for (<uusrc/*>) { |
532 |
root |
1.32 |
my ($retval, $count) = LoadFile ($_, $_, 1); |
533 |
|
|
print "file($_), status(", strerror $retval, ") parts($count)\n"; |
534 |
|
|
} |
535 |
root |
1.11 |
|
536 |
root |
1.53 |
Smerge -1; |
537 |
|
|
|
538 |
root |
1.32 |
SetOption OPT_SAVEPATH, "uudst/"; |
539 |
root |
1.11 |
|
540 |
root |
1.32 |
# now wade through all files and their source parts |
541 |
root |
1.47 |
for my $uu (GetFileList) { |
542 |
|
|
print "file ", $uu->filename, "\n"; |
543 |
|
|
print " state ", $uu->state, "\n"; |
544 |
|
|
print " mode ", $uu->mode, "\n"; |
545 |
|
|
print " uudet ", strencoding $uu->uudet, "\n"; |
546 |
|
|
print " size ", $uu->size, "\n"; |
547 |
|
|
print " subfname ", $uu->subfname, "\n"; |
548 |
|
|
print " mimeid ", $uu->mimeid, "\n"; |
549 |
|
|
print " mimetype ", $uu->mimetype, "\n"; |
550 |
root |
1.32 |
|
551 |
|
|
# print additional info about all parts |
552 |
root |
1.47 |
print " parts"; |
553 |
root |
1.32 |
for ($uu->parts) { |
554 |
root |
1.47 |
for my $k (sort keys %$_) { |
555 |
|
|
print " $k=$_->{$k}"; |
556 |
root |
1.32 |
} |
557 |
|
|
print "\n"; |
558 |
|
|
} |
559 |
|
|
|
560 |
|
|
$uu->remove_temp; |
561 |
|
|
|
562 |
root |
1.47 |
if (my $err = $uu->decode) { |
563 |
|
|
print " ERROR ", strerror $err, "\n"; |
564 |
root |
1.32 |
} else { |
565 |
root |
1.47 |
print " successfully saved as uudst/", $uu->filename, "\n"; |
566 |
root |
1.32 |
} |
567 |
|
|
} |
568 |
root |
1.11 |
|
569 |
root |
1.32 |
print "cleanup...\n"; |
570 |
root |
1.11 |
|
571 |
root |
1.32 |
CleanUp; |
572 |
root |
1.11 |
|
573 |
root |
1.46 |
=head1 PERLMULTICORE SUPPORT |
574 |
|
|
|
575 |
|
|
This module supports the perlmulticore standard (see |
576 |
|
|
L<http://perlmulticore.schmorp.de/> for more info) for the following |
577 |
|
|
functions - generally these are functions accessing the disk and/or using |
578 |
|
|
considerable CPU time: |
579 |
|
|
|
580 |
|
|
LoadFile |
581 |
|
|
$item->decode |
582 |
|
|
$item->decode_temp |
583 |
|
|
$item->remove_temp |
584 |
|
|
$item->info |
585 |
|
|
|
586 |
|
|
The perl interpreter will be reacquired/released on every callback |
587 |
|
|
invocation, so for performance reasons, callbacks should be avoided if |
588 |
|
|
that is costly. |
589 |
|
|
|
590 |
|
|
Future versions might enable multicore support for more functions. |
591 |
|
|
|
592 |
root |
1.45 |
=head1 BUGS AND LIMITATIONS |
593 |
|
|
|
594 |
|
|
The original uulib library this module uses was written at a time where |
595 |
|
|
main memory of measured in megabytes and buffer overflows as a security |
596 |
|
|
thign didn't exist. While a lot of security fixes have been applied over |
597 |
|
|
the years (includign some defense in depth mechanism that can shield |
598 |
|
|
against a lot of as-of-yet undetected bugs), using this library for |
599 |
|
|
security purposes requires care. |
600 |
|
|
|
601 |
|
|
Likewise, file sizes when the uulib library was written were tiny compared |
602 |
|
|
to today, so do not expect this library to handle files larger than 2GB. |
603 |
|
|
|
604 |
root |
1.50 |
Lastly, this module uses a very "C-like" interface, which means it doesn't |
605 |
|
|
protect you from invalid points as you might expect from "more perlish" |
606 |
|
|
modules - for example, accessing a file item object after callinbg |
607 |
|
|
C<CleanUp> will likely result in crashes, memory corruption, or worse. |
608 |
|
|
|
609 |
root |
1.1 |
=head1 AUTHOR |
610 |
|
|
|
611 |
root |
1.23 |
Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
612 |
root |
1.11 |
by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
613 |
|
|
bugfixed by Marc Lehmann. |
614 |
root |
1.1 |
|
615 |
|
|
=head1 SEE ALSO |
616 |
|
|
|
617 |
root |
1.45 |
perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>. |
618 |
root |
1.1 |
|
619 |
|
|
=cut |
620 |
root |
1.45 |
|