1 | package Convert::UUlib; |
1 | package Convert::UUlib; |
|
|
2 | |
|
|
3 | use common::sense; |
2 | |
4 | |
3 | use Carp; |
5 | use Carp; |
4 | |
6 | |
5 | require Exporter; |
7 | require Exporter; |
6 | require DynaLoader; |
8 | require DynaLoader; |
7 | use AutoLoader; |
|
|
8 | |
9 | |
9 | $VERSION = 0.21; |
10 | our $VERSION = 1.8; |
10 | |
11 | |
11 | @ISA = qw(Exporter DynaLoader); |
12 | our @ISA = qw(Exporter DynaLoader); |
12 | |
13 | |
13 | @_consts = qw( |
14 | our @_consts = qw( |
14 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
15 | ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
15 | |
16 | |
16 | FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA |
17 | FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA |
17 | FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE |
18 | FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE |
18 | |
19 | |
19 | MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING |
20 | MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING |
20 | |
21 | |
|
|
22 | OPT_RBUF OPT_WBUF |
21 | OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT |
23 | OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT |
22 | OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB |
24 | OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB |
23 | OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE |
25 | OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE |
24 | OPT_VERSION OPT_REMOVE OPT_MOREMIME |
26 | OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK |
25 | |
27 | |
26 | RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA |
28 | RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA |
27 | RET_NOEND RET_NOMEM RET_OK RET_UNSUP |
29 | RET_NOEND RET_NOMEM RET_OK RET_UNSUP |
28 | |
30 | |
29 | B64ENCODED BH_ENCODED PT_ENCODED QP_ENCODED |
31 | B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED |
30 | XX_ENCODED UU_ENCODED YENC_ENCODED |
32 | XX_ENCODED UU_ENCODED YENC_ENCODED |
31 | ); |
33 | ); |
32 | |
34 | |
33 | @_funcs = qw( |
35 | our @_funcs = qw( |
34 | Initialize CleanUp GetOption SetOption strerror |
36 | Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
35 | SetMsgCallback SetBusyCallback SetFileCallback |
37 | SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
36 | SetFNameFilter FNameFilter LoadFile GetFileListItem |
38 | FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp |
37 | RenameFile DecodeToTemp RemoveTemp DecodeFile |
39 | RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
38 | InfoFile Smerge QuickDecode EncodeMulti EncodePartial |
40 | EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
39 | EncodeToStream EncodeToFile E_PrepSingle E_PrepPartial |
41 | E_PrepPartial |
40 | |
42 | |
41 | straction strencoding strmsglevel |
43 | straction strencoding strmsglevel |
42 | ); |
44 | ); |
43 | |
45 | |
44 | @EXPORT = @_consts; |
46 | our @EXPORT = @_consts; |
45 | @EXPORT_OK = @_funcs; |
47 | our @EXPORT_OK = @_funcs; |
46 | %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
48 | our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
47 | |
49 | |
48 | bootstrap Convert::UUlib $VERSION; |
50 | bootstrap Convert::UUlib $VERSION; |
49 | |
51 | |
50 | Initialize(); |
52 | # dummy function for compatiiblity with pre-1.7 versions |
51 | |
53 | sub Initialize { } |
52 | # not when < 5.005_6x |
|
|
53 | # END { CleanUp() } |
|
|
54 | |
|
|
55 | for (@_consts) { |
|
|
56 | my $constant = constant($_); |
|
|
57 | *$_ = sub () { $constant }; |
|
|
58 | } |
|
|
59 | |
54 | |
60 | # action code -> string mapping |
55 | # action code -> string mapping |
61 | sub straction($) { |
56 | sub straction($) { |
62 | return 'copying' if $_[0] == &ACT_COPYING; |
57 | return 'copying' if $_[0] == &ACT_COPYING; |
63 | return 'decoding' if $_[0] == &ACT_DECODING; |
58 | return 'decoding' if $_[0] == &ACT_DECODING; |
… | |
… | |
68 | } |
63 | } |
69 | |
64 | |
70 | # encoding type -> string mapping |
65 | # encoding type -> string mapping |
71 | sub strencoding($) { |
66 | sub strencoding($) { |
72 | return 'uuencode' if $_[0] == &UU_ENCODED; |
67 | return 'uuencode' if $_[0] == &UU_ENCODED; |
73 | return 'base64' if $_[0] == &B64ENCODED; |
68 | return 'base64' if $_[0] == &B64_ENCODED; |
74 | return 'yenc' if $_[0] == &YENC_ENCODED; |
69 | return 'yenc' if $_[0] == &YENC_ENCODED; |
75 | return 'binhex' if $_[0] == &BH_ENCODED; |
70 | return 'binhex' if $_[0] == &BH_ENCODED; |
76 | return 'plaintext' if $_[0] == &PT_ENCODED; |
71 | return 'plaintext' if $_[0] == &PT_ENCODED; |
77 | return 'quoted-printable' if $_[0] == &QP_ENCODED; |
72 | return 'quoted-printable' if $_[0] == &QP_ENCODED; |
78 | return 'xxencode' if $_[0] == &XX_ENCODED; |
73 | return 'xxencode' if $_[0] == &XX_ENCODED; |
… | |
… | |
92 | 1; |
87 | 1; |
93 | __END__ |
88 | __END__ |
94 | |
89 | |
95 | =head1 NAME |
90 | =head1 NAME |
96 | |
91 | |
97 | Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview). |
92 | Convert::UUlib - decode uu/xx/b64/mime/yenc/etc-encoded data from a massive number of files |
98 | |
93 | |
99 | =head1 SYNOPSIS |
94 | =head1 SYNOPSIS |
100 | |
|
|
101 | use Convert::UUlib; |
|
|
102 | |
|
|
103 | =head1 DESCRIPTION |
|
|
104 | |
|
|
105 | Read the file uulibdoc.dvi.gz and the example-decoder source. Sorry - more |
|
|
106 | to come once people use me ;) |
|
|
107 | |
|
|
108 | =head1 SMALL EXAMPLE DECODER |
|
|
109 | |
|
|
110 | The following code excerpt is a minimal decoder program. It reads all |
|
|
111 | files given on the commandline and decodes any files in it. |
|
|
112 | |
95 | |
113 | use Convert::UUlib ':all'; |
96 | use Convert::UUlib ':all'; |
114 | |
97 | |
|
|
98 | # read all the files named on the commandline and decode them |
|
|
99 | # into the CURRENT directory. See below for a longer example. |
115 | LoadFile($_) for @ARGV; |
100 | LoadFile $_ for @ARGV; |
116 | |
101 | |
117 | for($i=0; $uu=GetFileListItem($i); $i++) { |
102 | for my $uu (GetFileList) { |
118 | $uu->decode if $uu->state & FILE_OK; |
103 | if ($uu->state & FILE_OK) { |
|
|
104 | $uu->decode; |
|
|
105 | print $uu->filename, "\n"; |
|
|
106 | } |
119 | } |
107 | } |
120 | |
108 | |
|
|
109 | =head1 DESCRIPTION |
|
|
110 | |
|
|
111 | This module started as an interface to the uulib/uudeview library by Frank |
|
|
112 | Pilhofer that can be used to decode all kinds of usenet (and other) |
|
|
113 | binary messages. |
|
|
114 | |
|
|
115 | After upstream abondoned the project, th library was continuously bugfixed |
|
|
116 | and improved in this module, with major focuses on security fixes, |
|
|
117 | correctness and speed (that does not mean that this library is considered |
|
|
118 | safe with untrusted data, but it surely is safer than the poriginal |
|
|
119 | uudeview). |
|
|
120 | |
|
|
121 | Read the file doc/library.pdf from the distribution for in-depth |
|
|
122 | information about the C-library used in this interface, and the rest of |
|
|
123 | this document and especially the non-trivial decoder program at the end. |
|
|
124 | |
|
|
125 | =head1 EXPORTED CONSTANTS |
|
|
126 | |
|
|
127 | =head2 Action code constants |
|
|
128 | |
|
|
129 | ACT_IDLE we don't do anything |
|
|
130 | ACT_SCANNING scanning an input file |
|
|
131 | ACT_DECODING decoding into a temp file |
|
|
132 | ACT_COPYING copying temp to target |
|
|
133 | ACT_ENCODING encoding a file |
|
|
134 | |
|
|
135 | =head2 Message severity levels |
|
|
136 | |
|
|
137 | MSG_MESSAGE just a message, nothing important |
|
|
138 | MSG_NOTE something that should be noticed |
|
|
139 | MSG_WARNING important msg, processing continues |
|
|
140 | MSG_ERROR processing has been terminated |
|
|
141 | MSG_FATAL decoder cannot process further requests |
|
|
142 | MSG_PANIC recovery impossible, app must terminate |
|
|
143 | |
|
|
144 | =head2 Options |
|
|
145 | |
|
|
146 | OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
|
|
147 | OPT_FAST assumes only one part per file |
|
|
148 | OPT_DUMBNESS switch off the program's intelligence |
|
|
149 | OPT_BRACKPOL give numbers in [] higher precendence |
|
|
150 | OPT_VERBOSE generate informative messages |
|
|
151 | OPT_DESPERATE try to decode incomplete files |
|
|
152 | OPT_IGNREPLY ignore RE:plies (off by default) |
|
|
153 | OPT_OVERWRITE whether it's OK to overwrite ex. files |
|
|
154 | OPT_SAVEPATH prefix to save-files on disk |
|
|
155 | OPT_IGNMODE ignore the original file mode |
|
|
156 | OPT_DEBUG print messages with FILE/LINE info |
|
|
157 | OPT_ERRNO get last error code for RET_IOERR (ro) |
|
|
158 | OPT_PROGRESS retrieve progress information |
|
|
159 | OPT_USETEXT handle text messages |
|
|
160 | OPT_PREAMB handle Mime preambles/epilogues |
|
|
161 | OPT_TINYB64 detect short B64 outside of Mime |
|
|
162 | OPT_ENCEXT extension for single-part encoded files |
|
|
163 | OPT_REMOVE remove input files after decoding (dangerous) |
|
|
164 | OPT_MOREMIME strict MIME adherence |
|
|
165 | OPT_DOTDOT ".."-unescaping has not yet been done on input files |
|
|
166 | OPT_RBUF set default read I/O buffer size in bytes |
|
|
167 | OPT_WBUF set default write I/O buffer size in bytes |
|
|
168 | OPT_AUTOCHECK automatically check file list after every loadfile |
|
|
169 | |
|
|
170 | =head2 Result/Error codes |
|
|
171 | |
|
|
172 | RET_OK everything went fine |
|
|
173 | RET_IOERR I/O Error - examine errno |
|
|
174 | RET_NOMEM not enough memory |
|
|
175 | RET_ILLVAL illegal value for operation |
|
|
176 | RET_NODATA decoder didn't find any data |
|
|
177 | RET_NOEND encoded data wasn't ended properly |
|
|
178 | RET_UNSUP unsupported function (encoding) |
|
|
179 | RET_EXISTS file exists (decoding) |
|
|
180 | RET_CONT continue -- special from ScanPart |
|
|
181 | RET_CANCEL operation canceled |
|
|
182 | |
|
|
183 | =head2 File States |
|
|
184 | |
|
|
185 | This code is zero, i.e. "false": |
|
|
186 | |
|
|
187 | UUFILE_READ Read in, but not further processed |
|
|
188 | |
|
|
189 | The following state codes are or'ed together: |
|
|
190 | |
|
|
191 | FILE_MISPART Missing Part(s) detected |
|
|
192 | FILE_NOBEGIN No 'begin' found |
|
|
193 | FILE_NOEND No 'end' found |
|
|
194 | FILE_NODATA File does not contain valid uudata |
|
|
195 | FILE_OK All Parts found, ready to decode |
|
|
196 | FILE_ERROR Error while decoding |
|
|
197 | FILE_DECODED Successfully decoded |
|
|
198 | FILE_TMPFILE Temporary decoded file exists |
|
|
199 | |
|
|
200 | =head2 Encoding types |
|
|
201 | |
|
|
202 | UU_ENCODED UUencoded data |
|
|
203 | B64_ENCODED Mime-Base64 data |
|
|
204 | XX_ENCODED XXencoded data |
|
|
205 | BH_ENCODED Binhex encoded |
|
|
206 | PT_ENCODED Plain-Text encoded (MIME) |
|
|
207 | QP_ENCODED Quoted-Printable (MIME) |
|
|
208 | YENC_ENCODED yEnc encoded (non-MIME) |
|
|
209 | |
|
|
210 | =head1 EXPORTED FUNCTIONS |
|
|
211 | |
|
|
212 | =head2 Initializing and cleanup |
|
|
213 | |
|
|
214 | Initialize is automatically called when the module is loaded and allocates |
|
|
215 | quite a small amount of memory for todays machines ;) CleanUp releases that |
|
|
216 | again. |
|
|
217 | |
|
|
218 | On my machine, a fairly complete decode with DBI backend needs about 10MB |
|
|
219 | RSS to decode 20000 files. |
|
|
220 | |
|
|
221 | =over |
|
|
222 | |
|
|
223 | =item CleanUp |
|
|
224 | |
|
|
225 | Release memory, file items and clean up files. Should be called after a |
|
|
226 | decoidng run, if you want to start a new one. |
|
|
227 | |
|
|
228 | =back |
|
|
229 | |
|
|
230 | =head2 Setting and querying options |
|
|
231 | |
|
|
232 | =over |
|
|
233 | |
|
|
234 | =item $option = GetOption OPT_xxx |
|
|
235 | |
|
|
236 | =item SetOption OPT_xxx, opt-value |
|
|
237 | |
|
|
238 | =back |
|
|
239 | |
|
|
240 | See the C<OPT_xxx> constants above to see which options exist. |
|
|
241 | |
|
|
242 | =head2 Setting various callbacks |
|
|
243 | |
|
|
244 | =over |
|
|
245 | |
|
|
246 | =item SetMsgCallback [callback-function] |
|
|
247 | |
|
|
248 | =item SetBusyCallback [callback-function] |
|
|
249 | |
|
|
250 | =item SetFileCallback [callback-function] |
|
|
251 | |
|
|
252 | =item SetFNameFilter [callback-function] |
|
|
253 | |
|
|
254 | =back |
|
|
255 | |
|
|
256 | =head2 Call the currently selected FNameFilter |
|
|
257 | |
|
|
258 | =over |
|
|
259 | |
|
|
260 | =item $file = FNameFilter $file |
|
|
261 | |
|
|
262 | =back |
|
|
263 | |
|
|
264 | =head2 Loading sourcefiles, optionally fuzzy merge and start decoding |
|
|
265 | |
|
|
266 | =over |
|
|
267 | |
|
|
268 | =item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
|
|
269 | |
|
|
270 | Load the given file and scan it for encoded contents. Optionally tag it |
|
|
271 | with the given id, and if C<$delflag> is true, delete the file after it |
|
|
272 | is no longer necessary. If you are certain of the part number, you can |
|
|
273 | specify it as the last argument. |
|
|
274 | |
|
|
275 | A better (usually faster) way of doing this is using the C<SetFNameFilter> |
|
|
276 | functionality. |
|
|
277 | |
|
|
278 | =item $retval = Smerge $pass |
|
|
279 | |
|
|
280 | If you are desperate, try to call C<Smerge> with increasing C<$pass> |
|
|
281 | values, beginning at C<0>, to try to merge parts that usually would not |
|
|
282 | have been merged. |
|
|
283 | |
|
|
284 | Most probably this will result in garbled files, so never do this by |
|
|
285 | default, except: |
|
|
286 | |
|
|
287 | If the C<OPT_AUTOCHECK> option has been disabled (by default it is |
|
|
288 | enabled) to speed up file loading, then you I<have> to call C<Smerge -1> |
|
|
289 | after loading all files as an additional pre-pass (which is normally done |
|
|
290 | by C<LoadFile>). |
|
|
291 | |
|
|
292 | =item $item = GetFileListItem $item_number |
|
|
293 | |
|
|
294 | Return the C<$item> structure for the C<$item_number>'th found file, or |
|
|
295 | C<undef> of no file with that number exists. |
|
|
296 | |
|
|
297 | The first file has number C<0>, and the series has no holes, so you can |
|
|
298 | iterate over all files by starting with zero and incrementing until you |
|
|
299 | hit C<undef>. |
|
|
300 | |
|
|
301 | This function has to walk the linear list of fils on each access, so |
|
|
302 | if you want to iterate over all items, it is usually faster to use |
|
|
303 | C<GetFileList>. |
|
|
304 | |
|
|
305 | =item @items = GetFileList |
|
|
306 | |
|
|
307 | Similar to C<GetFileListItem>, but returns all files in one go, which is |
|
|
308 | very much faster for large number of items, and has no drawbacks when used |
|
|
309 | for a small number of items. |
|
|
310 | |
|
|
311 | =back |
|
|
312 | |
|
|
313 | =head2 Decoding files |
|
|
314 | |
|
|
315 | =over |
|
|
316 | |
|
|
317 | =item $retval = $item->rename ($newname) |
|
|
318 | |
|
|
319 | Change the ondisk filename where the decoded file will be saved. |
|
|
320 | |
|
|
321 | =item $retval = $item->decode_temp |
|
|
322 | |
|
|
323 | Decode the file into a temporary location, use C<< $item->infile >> to |
|
|
324 | retrieve the temporary filename. |
|
|
325 | |
|
|
326 | =item $retval = $item->remove_temp |
|
|
327 | |
|
|
328 | Remove the temporarily decoded file again. |
|
|
329 | |
|
|
330 | =item $retval = $item->decode ([$target_path]) |
|
|
331 | |
|
|
332 | Decode the file to its destination, or the given target path. |
|
|
333 | |
|
|
334 | =item $retval = $item->info (callback-function) |
|
|
335 | |
|
|
336 | =back |
|
|
337 | |
|
|
338 | =head2 Querying (and setting) item attributes |
|
|
339 | |
|
|
340 | =over |
|
|
341 | |
|
|
342 | =item $state = $item->state |
|
|
343 | |
|
|
344 | =item $mode = $item->mode ([newmode]) |
|
|
345 | |
|
|
346 | =item $uudet = $item->uudet |
|
|
347 | |
|
|
348 | =item $size = $item->size |
|
|
349 | |
|
|
350 | =item $filename = $item->filename ([newfilename}) |
|
|
351 | |
|
|
352 | =item $subfname = $item->subfname |
|
|
353 | |
|
|
354 | =item $mimeid = $item->mimeid |
|
|
355 | |
|
|
356 | =item $mimetype = $item->mimetype |
|
|
357 | |
|
|
358 | =item $binfile = $item->binfile |
|
|
359 | |
|
|
360 | =back |
|
|
361 | |
|
|
362 | =head2 Information about source parts |
|
|
363 | |
|
|
364 | =over |
|
|
365 | |
|
|
366 | =item $parts = $item->parts |
|
|
367 | |
|
|
368 | Return information about all parts (source files) used to decode the file |
|
|
369 | as a list of hashrefs with the following structure: |
|
|
370 | |
|
|
371 | { |
|
|
372 | partno => <integer describing the part number, starting with 1>, |
|
|
373 | # the following member sonly exist when they contain useful information |
|
|
374 | sfname => <local pathname of the file where this part is from>, |
|
|
375 | filename => <the ondisk filename of the decoded file>, |
|
|
376 | subfname => <used to cluster postings, possibly the posting filename>, |
|
|
377 | subject => <the subject of the posting/mail>, |
|
|
378 | origin => <the possible source (From) address>, |
|
|
379 | mimetype => <the possible mimetype of the decoded file>, |
|
|
380 | mimeid => <the id part of the Content-Type>, |
|
|
381 | } |
|
|
382 | |
|
|
383 | Usually you are interested mostly the C<sfname> and possibly the C<partno> |
|
|
384 | and C<filename> members. |
|
|
385 | |
|
|
386 | =back |
|
|
387 | |
|
|
388 | =head2 Functions below are not documented and not very well tested - feedback welcome |
|
|
389 | |
|
|
390 | QuickDecode |
|
|
391 | EncodeMulti |
|
|
392 | EncodePartial |
|
|
393 | EncodeToStream |
|
|
394 | EncodeToFile |
|
|
395 | E_PrepSingle |
|
|
396 | E_PrepPartial |
|
|
397 | |
|
|
398 | =head2 EXTENSION FUNCTIONS |
|
|
399 | |
|
|
400 | Functions found in this module but not documented in the uulib documentation: |
|
|
401 | |
|
|
402 | =over |
|
|
403 | |
|
|
404 | =item $msg = straction ACT_xxx |
|
|
405 | |
|
|
406 | Return a human readable string representing the given action code. |
|
|
407 | |
|
|
408 | =item $msg = strerror RET_xxx |
|
|
409 | |
|
|
410 | Return a human readable string representing the given error code. |
|
|
411 | |
|
|
412 | =item $str = strencoding xxx_ENCODED |
|
|
413 | |
|
|
414 | Return the name of the encoding type as a string. |
|
|
415 | |
|
|
416 | =item $str = strmsglevel MSG_xxx |
|
|
417 | |
|
|
418 | Returns the message level as a string. |
|
|
419 | |
|
|
420 | =item SetFileNameCallback $cb |
|
|
421 | |
|
|
422 | Sets (or queries) the FileNameCallback, which is called whenever the |
|
|
423 | decoding library can't find a filename and wants to extract a filename |
|
|
424 | from the subject line of a posting. The callback will be called with |
|
|
425 | two arguments, the subject line and the current candidate for the |
|
|
426 | filename. The latter argument can be C<undef>, which means that no |
|
|
427 | filename could be found (and likely no one exists, so it is safe to also |
|
|
428 | return C<undef> in this case). If it doesn't return anything (not even |
|
|
429 | C<undef>!), then nothing happens, so this is a no-op callback: |
|
|
430 | |
|
|
431 | sub cb { |
|
|
432 | return (); |
|
|
433 | } |
|
|
434 | |
|
|
435 | If it returns C<undef>, then this indicates that no filename could be |
|
|
436 | found. In all other cases, the return value is taken to be the filename. |
|
|
437 | |
|
|
438 | This is a slightly more useful callback: |
|
|
439 | |
|
|
440 | sub cb { |
|
|
441 | return unless $_[1]; # skip "Re:"-plies et al. |
|
|
442 | my ($subject, $filename) = @_; |
|
|
443 | # if we find some *.rar, take it |
|
|
444 | return $1 if $subject =~ /(\w+\.rar)/; |
|
|
445 | # otherwise just pass what we have |
|
|
446 | return (); |
|
|
447 | } |
|
|
448 | |
|
|
449 | =back |
|
|
450 | |
121 | =head1 LARGE EXAMPLE DECODER |
451 | =head1 LARGE EXAMPLE DECODER |
122 | |
452 | |
|
|
453 | The general workflow for decoding is like this: |
|
|
454 | |
|
|
455 | =over |
|
|
456 | |
|
|
457 | =item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
|
|
458 | |
|
|
459 | =item 2. Load all source files with C<LoadFile>. |
|
|
460 | |
|
|
461 | =item 3. Optionally C<Smerge>. |
|
|
462 | |
|
|
463 | =item 4. Iterate over all C<GetFileList> items (i.e. result files). |
|
|
464 | |
|
|
465 | =item 5. C<CleanUp> to delete files and free items. |
|
|
466 | |
|
|
467 | =back |
|
|
468 | |
123 | This is the file C<example-decoder> from the distribution, put here |
469 | What follows is the file C<example-decoder> from the distribution that |
124 | instead of more thorough documentation. |
470 | illustrates the above worklfow in a non-trivial example. |
125 | |
471 | |
|
|
472 | #!/usr/bin/perl |
|
|
473 | |
126 | # decode all the files in the directory uusrc/ and copy |
474 | # decode all the files in the directory uusrc/ and copy |
127 | # the resulting files to uudst/ |
475 | # the resulting files to uudst/ |
128 | |
476 | |
129 | use Convert::UUlib ':all'; |
477 | use Convert::UUlib ':all'; |
130 | |
478 | |
131 | sub namefilter { |
479 | sub namefilter { |
132 | my($path)=@_; |
480 | my ($path) = @_; |
|
|
481 | |
133 | $path=~s/^.*[\/\\]//; |
482 | $path=~s/^.*[\/\\]//; |
|
|
483 | |
134 | $path; |
484 | $path |
135 | } |
485 | } |
136 | |
486 | |
137 | sub busycb { |
487 | sub busycb { |
138 | my($action,$curfile,$partno,$numparts,$percent,$fsize)=@_; |
488 | my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; |
139 | $_[0]=straction($action); |
489 | $_[0]=straction($action); |
140 | print "busy_callback(",join(",",@_),")\n"; |
490 | print "busy_callback(", (join ",",@_), ")\n"; |
141 | 0; |
491 | 0 |
142 | } |
492 | } |
143 | |
493 | |
|
|
494 | SetOption OPT_RBUF, 128*1024; |
|
|
495 | SetOption OPT_WBUF, 1024*1024; |
144 | SetOption (OPT_IGNMODE, 1); |
496 | SetOption OPT_IGNMODE, 1; |
|
|
497 | SetOption OPT_IGNMODE, 1; |
145 | SetOption (OPT_VERBOSE, 1); |
498 | SetOption OPT_VERBOSE, 1; |
|
|
499 | SetOption OPT_AUTOCHK, 0; |
146 | |
500 | |
147 | # show the three ways you can set callback functions |
501 | # show the three ways you can set callback functions. I normally |
|
|
502 | # prefer the one with the sub inplace. |
148 | SetFNameFilter (\&namefilter); |
503 | SetFNameFilter \&namefilter; |
149 | |
504 | |
150 | SetBusyCallback ("busycb",333); |
505 | SetBusyCallback "busycb", 333; |
151 | |
506 | |
152 | SetMsgCallback (sub { |
507 | SetMsgCallback sub { |
153 | my($msg,$level)=@_; |
508 | my ($msg, $level) = @_; |
154 | print uc(strmsglevel($_[1])),": $msg\n"; |
509 | print uc strmsglevel $_[1], ": $msg\n"; |
155 | }); |
510 | }; |
156 | |
511 | |
|
|
512 | # the following non-trivial FileNameCallback takes care |
|
|
513 | # of some subject lines not detected properly by uulib: |
|
|
514 | SetFileNameCallback sub { |
|
|
515 | return unless $_[1]; # skip "Re:"-plies et al. |
|
|
516 | local $_ = $_[0]; |
|
|
517 | |
|
|
518 | # the following rules are rather effective on some newsgroups, |
|
|
519 | # like alt.binaries.games.anime, where non-mime, uuencoded data |
|
|
520 | # is very common |
|
|
521 | |
|
|
522 | # if we find some *.rar, take it as the filename |
|
|
523 | return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; |
|
|
524 | |
|
|
525 | # one common subject format |
|
|
526 | return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; |
|
|
527 | |
|
|
528 | # - filename.par (04/55) |
|
|
529 | return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; |
|
|
530 | |
|
|
531 | # - (xxx) No. 1 sayuri81.jpg 756565 bytes |
|
|
532 | # - (20 files) No.17 Roseanne.jpg [2/2] |
|
|
533 | return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; |
|
|
534 | |
|
|
535 | # try to detect some common forms of filenames |
|
|
536 | return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i; |
|
|
537 | |
|
|
538 | # otherwise just pass what we have |
|
|
539 | () |
|
|
540 | }; |
|
|
541 | |
|
|
542 | # now read all files in the directory uusrc/* |
157 | for(<uusrc/*>) { |
543 | for (<uusrc/*>) { |
158 | my($retval,$count)=LoadFile ($_,$_,1); |
544 | my ($retval, $count) = LoadFile ($_, $_, 1); |
159 | print "file($_), status(",strerror($retval),") parts($count)\n"; |
545 | print "file($_), status(", strerror $retval, ") parts($count)\n"; |
160 | } |
546 | } |
161 | |
547 | |
|
|
548 | Smerge -1; |
|
|
549 | |
162 | SetOption (OPT_SAVEPATH, "uudst/"); |
550 | SetOption OPT_SAVEPATH, "uudst/"; |
163 | |
551 | |
164 | $i=0; |
552 | # now wade through all files and their source parts |
165 | while($uu=GetFileListItem($i)) { |
553 | for my $uu (GetFileList) { |
166 | $i++; |
554 | print "file ", $uu->filename, "\n"; |
167 | print "file nr. $i"; |
|
|
168 | print " state ",$uu->state; |
555 | print " state ", $uu->state, "\n"; |
169 | print " mode ",$uu->mode; |
556 | print " mode ", $uu->mode, "\n"; |
170 | print " uudet ",strencoding($uu->uudet); |
557 | print " uudet ", strencoding $uu->uudet, "\n"; |
171 | print " size ",$uu->size; |
558 | print " size ", $uu->size, "\n"; |
172 | print " filename ",$uu->filename; |
|
|
173 | print " subfname ",$uu->subfname; |
559 | print " subfname ", $uu->subfname, "\n"; |
174 | print " mimeid ",$uu->mimeid; |
560 | print " mimeid ", $uu->mimeid, "\n"; |
175 | print " mimetype ",$uu->mimetype; |
561 | print " mimetype ", $uu->mimetype, "\n"; |
176 | print "\n"; |
|
|
177 | |
562 | |
178 | # print additional info about all parts |
563 | # print additional info about all parts |
|
|
564 | print " parts"; |
179 | for($uu->parts) { |
565 | for ($uu->parts) { |
180 | while(my($k,$v)=each(%$_)) { |
566 | for my $k (sort keys %$_) { |
181 | print "$k > $v, "; |
567 | print " $k=$_->{$k}"; |
182 | } |
568 | } |
183 | print "\n"; |
569 | print "\n"; |
184 | } |
570 | } |
185 | |
571 | |
186 | $uu->decode_temp; |
|
|
187 | print " temporarily decoded to ",$uu->binfile,"\n"; |
|
|
188 | $uu->remove_temp; |
572 | $uu->remove_temp; |
189 | |
573 | |
190 | print strerror($uu->decode); |
574 | if (my $err = $uu->decode) { |
|
|
575 | print " ERROR ", strerror $err, "\n"; |
|
|
576 | } else { |
191 | print " saved as uudst/",$uu->filename,"\n"; |
577 | print " successfully saved as uudst/", $uu->filename, "\n"; |
192 | } |
578 | } |
|
|
579 | } |
193 | |
580 | |
194 | print "cleanup...\n"; |
581 | print "cleanup...\n"; |
195 | |
582 | |
196 | CleanUp(); |
583 | CleanUp; |
197 | |
584 | |
198 | =head1 Exported constants |
585 | =head1 PERLMULTICORE SUPPORT |
199 | |
586 | |
200 | Action code constants: |
587 | This module supports the perlmulticore standard (see |
|
|
588 | L<http://perlmulticore.schmorp.de/> for more info) for the following |
|
|
589 | functions - generally these are functions accessing the disk and/or using |
|
|
590 | considerable CPU time: |
201 | |
591 | |
202 | ACT_COPYING ACT_DECODING ACT_ENCODING |
592 | LoadFile |
203 | ACT_IDLE ACT_SCANNING |
593 | $item->decode |
204 | |
|
|
205 | File status flags: |
|
|
206 | |
|
|
207 | FILE_DECODED FILE_ERROR FILE_MISPART |
|
|
208 | FILE_NOBEGIN FILE_NODATA FILE_NOEND |
|
|
209 | FILE_OK FILE_READ FILE_TMPFILE |
|
|
210 | |
|
|
211 | Message severity levels: |
|
|
212 | |
|
|
213 | MSG_ERROR MSG_FATAL MSG_MESSAGE |
|
|
214 | MSG_NOTE MSG_PANIC MSG_WARNING |
|
|
215 | |
|
|
216 | Options: |
|
|
217 | |
|
|
218 | OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS |
|
|
219 | OPT_ENCEXT OPT_ERRNO OPT_FAST OPT_IGNMODE |
|
|
220 | OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB OPT_PROGRESS |
|
|
221 | OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE |
|
|
222 | OPT_VERSION OPT_REMOVE OPT_MOREMIME |
|
|
223 | |
|
|
224 | Error/Result codes: |
|
|
225 | |
|
|
226 | RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR |
|
|
227 | RET_NODATA RET_NOEND RET_NOMEM RET_OK RET_UNSUP |
|
|
228 | |
|
|
229 | Encoding types: |
|
|
230 | |
|
|
231 | B64ENCODED BH_ENCODED PT_ENCODED |
|
|
232 | QP_ENCODED XX_ENCODED UU_ENCODED |
|
|
233 | |
|
|
234 | =head1 Exported functions |
|
|
235 | |
|
|
236 | Initializing and cleanup (Initialize is automatically called when the |
|
|
237 | module is loaded and allocates quite a bit of memory. CleanUp releases |
|
|
238 | that again). |
|
|
239 | |
|
|
240 | Initialize; # not normally necessary |
|
|
241 | CleanUp; # could be called at the end to release memory |
|
|
242 | |
|
|
243 | Setting and querying options: |
|
|
244 | |
|
|
245 | $option = GetOption OPT_xxx; |
|
|
246 | SetOption OPT_xxx, opt-value; |
|
|
247 | |
|
|
248 | Error and action values => stringified: |
|
|
249 | |
|
|
250 | $msg = straction ACT_xxx; |
|
|
251 | $msg = strerror RET_xxx; |
|
|
252 | |
|
|
253 | Setting various callbacks: |
|
|
254 | |
|
|
255 | SetMsgCallback [callback-function]; |
|
|
256 | SetBusyCallback [callback-function]; |
|
|
257 | SetFileCallback [callback-function]; |
|
|
258 | SetFNameFilter [callback-function]; |
|
|
259 | |
|
|
260 | Call the currently selected FNameFilter: |
|
|
261 | |
|
|
262 | $file = FNameFilter $file; |
|
|
263 | |
|
|
264 | Loading sourcefiles, optionally fuzzy merge and start decoding: |
|
|
265 | |
|
|
266 | ($retval, $count) = LoadFile $fname, [$id, [$delflag]]; |
|
|
267 | $retval = Smerge $pass; |
|
|
268 | $item = GetFileListItem $item_number; |
|
|
269 | |
|
|
270 | The procedural interface is undocumented, use the following methods instead: |
|
|
271 | |
|
|
272 | $retval = $item->rename($newname); |
|
|
273 | $retval = $item->decode_temp; |
594 | $item->decode_temp |
274 | $retval = $item->remove_temp; |
595 | $item->remove_temp |
275 | $retval = $item->decode([$target_path]); |
596 | $item->info |
276 | $retval = $item->info(callback-function); |
|
|
277 | |
597 | |
278 | Querying (and setting) item attributes: |
598 | The perl interpreter will be reacquired/released on every callback |
|
|
599 | invocation, so for performance reasons, callbacks should be avoided if |
|
|
600 | that is costly. |
279 | |
601 | |
280 | $state = $item->state; |
602 | Future versions might enable multicore support for more functions. |
281 | $mode = $item->mode([newmode]); |
|
|
282 | $uudet = $item->uudet; |
|
|
283 | $size = $item->size; |
|
|
284 | $filename = $item->filename([newfilename}); |
|
|
285 | $subfname = $item->subfname; |
|
|
286 | $mimeid = $item->mimeid; |
|
|
287 | $mimetype = $item->mimetype; |
|
|
288 | $binfile = $item->binfile; |
|
|
289 | |
603 | |
290 | Totally undocumented and unsupported(!): |
604 | =head1 BUGS AND LIMITATIONS |
291 | |
605 | |
292 | $parts = $item->parts; |
606 | The original uulib library this module uses was written at a time where |
|
|
607 | main memory of measured in megabytes and buffer overflows as a security |
|
|
608 | thign didn't exist. While a lot of security fixes have been applied over |
|
|
609 | the years (includign some defense in depth mechanism that can shield |
|
|
610 | against a lot of as-of-yet undetected bugs), using this library for |
|
|
611 | security purposes requires care. |
293 | |
612 | |
294 | Functions below not documented and not very well tested: |
613 | Likewise, file sizes when the uulib library was written were tiny compared |
|
|
614 | to today, so do not expect this library to handle files larger than 2GB, |
|
|
615 | certainly not on a 32 bit host. |
295 | |
616 | |
296 | int QuickDecode () ; |
617 | Lastly, this module uses a very "C-like" interface, which means it doesn't |
297 | int EncodeMulti () ; |
618 | protect you from invalid pointers as you might expect from "more perlish" |
298 | int EncodePartial () ; |
619 | modules - for example, accessing a file item object after calling |
299 | int EncodeToStream () ; |
620 | C<CleanUp> will likely result in crashes, memory corruption, or worse. |
300 | int EncodeToFile () ; |
|
|
301 | int E_PrepSingle () ; |
|
|
302 | int E_PrepPartial () ; |
|
|
303 | |
621 | |
304 | =head1 AUTHOR |
622 | =head1 AUTHOR |
305 | |
623 | |
306 | Marc Lehmann <pcg@goof.com>, the original uulib library was written by |
624 | Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
307 | Frank Pilhofer <fp@informatik.uni-frankfurt.de>. |
625 | by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
|
|
626 | bugfixed by Marc Lehmann. |
308 | |
627 | |
309 | =head1 SEE ALSO |
628 | =head1 SEE ALSO |
310 | |
629 | |
311 | perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/. |
630 | perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>. |
312 | |
631 | |
313 | =cut |
632 | =cut |
|
|
633 | |