| 1 |
package Convert::UUlib; |
| 2 |
|
| 3 |
use common::sense; |
| 4 |
|
| 5 |
use Carp; |
| 6 |
|
| 7 |
require Exporter; |
| 8 |
require DynaLoader; |
| 9 |
|
| 10 |
our $VERSION = 1.8; |
| 11 |
|
| 12 |
our @ISA = qw(Exporter DynaLoader); |
| 13 |
|
| 14 |
our @_consts = qw( |
| 15 |
ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING |
| 16 |
|
| 17 |
FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA |
| 18 |
FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE |
| 19 |
|
| 20 |
MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING |
| 21 |
|
| 22 |
OPT_RBUF OPT_WBUF |
| 23 |
OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT |
| 24 |
OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB |
| 25 |
OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE |
| 26 |
OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT OPT_AUTOCHECK |
| 27 |
|
| 28 |
RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA |
| 29 |
RET_NOEND RET_NOMEM RET_OK RET_UNSUP |
| 30 |
|
| 31 |
B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED |
| 32 |
XX_ENCODED UU_ENCODED YENC_ENCODED |
| 33 |
); |
| 34 |
|
| 35 |
our @_funcs = qw( |
| 36 |
Initialize CleanUp GetOption SetOption strerror SetMsgCallback |
| 37 |
SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback |
| 38 |
FNameFilter LoadFile GetFileListItem GetFileList RenameFile DecodeToTemp |
| 39 |
RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti |
| 40 |
EncodePartial EncodeToStream EncodeToFile E_PrepSingle |
| 41 |
E_PrepPartial |
| 42 |
|
| 43 |
straction strencoding strmsglevel |
| 44 |
); |
| 45 |
|
| 46 |
our @EXPORT = @_consts; |
| 47 |
our @EXPORT_OK = @_funcs; |
| 48 |
our %EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts); |
| 49 |
|
| 50 |
bootstrap Convert::UUlib $VERSION; |
| 51 |
|
| 52 |
# dummy function for compatiiblity with pre-1.7 versions |
| 53 |
sub Initialize { } |
| 54 |
|
| 55 |
# action code -> string mapping |
| 56 |
sub straction($) { |
| 57 |
return 'copying' if $_[0] == &ACT_COPYING; |
| 58 |
return 'decoding' if $_[0] == &ACT_DECODING; |
| 59 |
return 'encoding' if $_[0] == &ACT_ENCODING; |
| 60 |
return 'idle' if $_[0] == &ACT_IDLE; |
| 61 |
return 'scanning' if $_[0] == &ACT_SCANNING; |
| 62 |
'unknown'; |
| 63 |
} |
| 64 |
|
| 65 |
# encoding type -> string mapping |
| 66 |
sub strencoding($) { |
| 67 |
return 'uuencode' if $_[0] == &UU_ENCODED; |
| 68 |
return 'base64' if $_[0] == &B64_ENCODED; |
| 69 |
return 'yenc' if $_[0] == &YENC_ENCODED; |
| 70 |
return 'binhex' if $_[0] == &BH_ENCODED; |
| 71 |
return 'plaintext' if $_[0] == &PT_ENCODED; |
| 72 |
return 'quoted-printable' if $_[0] == &QP_ENCODED; |
| 73 |
return 'xxencode' if $_[0] == &XX_ENCODED; |
| 74 |
'unknown'; |
| 75 |
} |
| 76 |
|
| 77 |
sub strmsglevel($) { |
| 78 |
return 'message' if $_[0] == &MSG_MESSAGE; |
| 79 |
return 'note' if $_[0] == &MSG_NOTE; |
| 80 |
return 'warning' if $_[0] == &MSG_WARNING; |
| 81 |
return 'error' if $_[0] == &MSG_ERROR; |
| 82 |
return 'panic' if $_[0] == &MSG_PANIC; |
| 83 |
return 'fatal' if $_[0] == &MSG_FATAL; |
| 84 |
'unknown'; |
| 85 |
} |
| 86 |
|
| 87 |
1; |
| 88 |
__END__ |
| 89 |
|
| 90 |
=head1 NAME |
| 91 |
|
| 92 |
Convert::UUlib - decode uu/xx/b64/mime/yenc/etc-encoded data from a massive number of files |
| 93 |
|
| 94 |
=head1 SYNOPSIS |
| 95 |
|
| 96 |
use Convert::UUlib ':all'; |
| 97 |
|
| 98 |
# read all the files named on the commandline and decode them |
| 99 |
# into the CURRENT directory. See below for a longer example. |
| 100 |
LoadFile $_ for @ARGV; |
| 101 |
|
| 102 |
for my $uu (GetFileList) { |
| 103 |
if ($uu->state & FILE_OK) { |
| 104 |
$uu->decode; |
| 105 |
print $uu->filename, "\n"; |
| 106 |
} |
| 107 |
} |
| 108 |
|
| 109 |
=head1 DESCRIPTION |
| 110 |
|
| 111 |
This module started as an interface to the uulib/uudeview library by Frank |
| 112 |
Pilhofer that can be used to decode all kinds of usenet (and other) |
| 113 |
binary messages. |
| 114 |
|
| 115 |
After upstream abondoned the project, the library was continuously bugfixed |
| 116 |
and improved in this module, with major focuses on security fixes, |
| 117 |
correctness and speed (that does not mean that this library is considered |
| 118 |
safe with untrusted data, but it surely is safer than the original |
| 119 |
uudeview). |
| 120 |
|
| 121 |
Read the file doc/library.pdf from the distribution for in-depth |
| 122 |
information about the C-library used in this interface, and the rest of |
| 123 |
this document and especially the non-trivial decoder program at the end. |
| 124 |
|
| 125 |
=head1 EXPORTED CONSTANTS |
| 126 |
|
| 127 |
=head2 Action code constants |
| 128 |
|
| 129 |
ACT_IDLE we don't do anything |
| 130 |
ACT_SCANNING scanning an input file |
| 131 |
ACT_DECODING decoding into a temp file |
| 132 |
ACT_COPYING copying temp to target |
| 133 |
ACT_ENCODING encoding a file |
| 134 |
|
| 135 |
=head2 Message severity levels |
| 136 |
|
| 137 |
MSG_MESSAGE just a message, nothing important |
| 138 |
MSG_NOTE something that should be noticed |
| 139 |
MSG_WARNING important msg, processing continues |
| 140 |
MSG_ERROR processing has been terminated |
| 141 |
MSG_FATAL decoder cannot process further requests |
| 142 |
MSG_PANIC recovery impossible, app must terminate |
| 143 |
|
| 144 |
=head2 Options |
| 145 |
|
| 146 |
OPT_VERSION version number MAJOR.MINORplPATCH (ro) |
| 147 |
OPT_FAST assumes only one part per file |
| 148 |
OPT_DUMBNESS switch off the program's intelligence |
| 149 |
OPT_BRACKPOL give numbers in [] higher precedence |
| 150 |
OPT_VERBOSE generate informative messages |
| 151 |
OPT_DESPERATE try to decode incomplete files |
| 152 |
OPT_IGNREPLY ignore RE:plies (off by default) |
| 153 |
OPT_OVERWRITE whether it's OK to overwrite ex. files |
| 154 |
OPT_SAVEPATH prefix to save-files on disk |
| 155 |
OPT_IGNMODE ignore the original file mode |
| 156 |
OPT_DEBUG print messages with FILE/LINE info |
| 157 |
OPT_ERRNO get last error code for RET_IOERR (ro) |
| 158 |
OPT_PROGRESS retrieve progress information |
| 159 |
OPT_USETEXT handle text messages |
| 160 |
OPT_PREAMB handle Mime preambles/epilogues |
| 161 |
OPT_TINYB64 detect short B64 outside of Mime |
| 162 |
OPT_ENCEXT extension for single-part encoded files |
| 163 |
OPT_REMOVE remove input files after decoding (dangerous) |
| 164 |
OPT_MOREMIME strict MIME adherence |
| 165 |
OPT_DOTDOT ".."-unescaping has not yet been done on input files |
| 166 |
OPT_RBUF set default read I/O buffer size in bytes |
| 167 |
OPT_WBUF set default write I/O buffer size in bytes |
| 168 |
OPT_AUTOCHECK automatically check file list after every loadfile |
| 169 |
|
| 170 |
=head2 Result/Error codes |
| 171 |
|
| 172 |
RET_OK everything went fine |
| 173 |
RET_IOERR I/O Error - examine errno |
| 174 |
RET_NOMEM not enough memory |
| 175 |
RET_ILLVAL illegal value for operation |
| 176 |
RET_NODATA decoder didn't find any data |
| 177 |
RET_NOEND encoded data wasn't ended properly |
| 178 |
RET_UNSUP unsupported function (encoding) |
| 179 |
RET_EXISTS file exists (decoding) |
| 180 |
RET_CONT continue -- special from ScanPart |
| 181 |
RET_CANCEL operation canceled |
| 182 |
|
| 183 |
=head2 File States |
| 184 |
|
| 185 |
This code is zero, i.e. "false": |
| 186 |
|
| 187 |
UUFILE_READ Read in, but not further processed |
| 188 |
|
| 189 |
The following state codes are or'ed together: |
| 190 |
|
| 191 |
FILE_MISPART Missing Part(s) detected |
| 192 |
FILE_NOBEGIN No 'begin' found |
| 193 |
FILE_NOEND No 'end' found |
| 194 |
FILE_NODATA File does not contain valid uudata |
| 195 |
FILE_OK All Parts found, ready to decode |
| 196 |
FILE_ERROR Error while decoding |
| 197 |
FILE_DECODED Successfully decoded |
| 198 |
FILE_TMPFILE Temporary decoded file exists |
| 199 |
|
| 200 |
=head2 Encoding types |
| 201 |
|
| 202 |
UU_ENCODED UUencoded data |
| 203 |
B64_ENCODED Mime-Base64 data |
| 204 |
XX_ENCODED XXencoded data |
| 205 |
BH_ENCODED Binhex encoded |
| 206 |
PT_ENCODED Plain-Text encoded (MIME) |
| 207 |
QP_ENCODED Quoted-Printable (MIME) |
| 208 |
YENC_ENCODED yEnc encoded (non-MIME) |
| 209 |
|
| 210 |
=head1 EXPORTED FUNCTIONS |
| 211 |
|
| 212 |
=head2 Initializing and cleanup |
| 213 |
|
| 214 |
Initialize is automatically called when the module is loaded and allocates |
| 215 |
quite a small amount of memory for todays machines ;) CleanUp releases that |
| 216 |
again. |
| 217 |
|
| 218 |
On my machine, a fairly complete decode with DBI backend needs about 10MB |
| 219 |
RSS to decode 20000 files. |
| 220 |
|
| 221 |
=over |
| 222 |
|
| 223 |
=item CleanUp |
| 224 |
|
| 225 |
Release memory, file items and clean up files. Should be called after a |
| 226 |
decoidng run, if you want to start a new one. |
| 227 |
|
| 228 |
=back |
| 229 |
|
| 230 |
=head2 Setting and querying options |
| 231 |
|
| 232 |
=over |
| 233 |
|
| 234 |
=item $option = GetOption OPT_xxx |
| 235 |
|
| 236 |
=item SetOption OPT_xxx, opt-value |
| 237 |
|
| 238 |
=back |
| 239 |
|
| 240 |
See the C<OPT_xxx> constants above to see which options exist. |
| 241 |
|
| 242 |
=head2 Setting various callbacks |
| 243 |
|
| 244 |
=over |
| 245 |
|
| 246 |
=item SetMsgCallback [callback-function] |
| 247 |
|
| 248 |
=item SetBusyCallback [callback-function] |
| 249 |
|
| 250 |
=item SetFileCallback [callback-function] |
| 251 |
|
| 252 |
=item SetFNameFilter [callback-function] |
| 253 |
|
| 254 |
=back |
| 255 |
|
| 256 |
=head2 Call the currently selected FNameFilter |
| 257 |
|
| 258 |
=over |
| 259 |
|
| 260 |
=item $file = FNameFilter $file |
| 261 |
|
| 262 |
=back |
| 263 |
|
| 264 |
=head2 Loading sourcefiles, optionally fuzzy merge and start decoding |
| 265 |
|
| 266 |
=over |
| 267 |
|
| 268 |
=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]] |
| 269 |
|
| 270 |
Load the given file and scan it for encoded contents. Optionally tag it |
| 271 |
with the given id, and if C<$delflag> is true, delete the file after it |
| 272 |
is no longer necessary. If you are certain of the part number, you can |
| 273 |
specify it as the last argument. |
| 274 |
|
| 275 |
A better (usually faster) way of doing this is using the C<SetFNameFilter> |
| 276 |
functionality. |
| 277 |
|
| 278 |
=item $retval = Smerge $pass |
| 279 |
|
| 280 |
If you are desperate, try to call C<Smerge> with increasing C<$pass> |
| 281 |
values, beginning at C<0>, to try to merge parts that usually would not |
| 282 |
have been merged. |
| 283 |
|
| 284 |
Most probably this will result in garbled files, so never do this by |
| 285 |
default, except: |
| 286 |
|
| 287 |
If the C<OPT_AUTOCHECK> option has been disabled (by default it is |
| 288 |
enabled) to speed up file loading, then you I<have> to call C<Smerge -1> |
| 289 |
after loading all files as an additional pre-pass (which is normally done |
| 290 |
by C<LoadFile>). |
| 291 |
|
| 292 |
=item $item = GetFileListItem $item_number |
| 293 |
|
| 294 |
Return the C<$item> structure for the C<$item_number>'th found file, or |
| 295 |
C<undef> of no file with that number exists. |
| 296 |
|
| 297 |
The first file has number C<0>, and the series has no holes, so you can |
| 298 |
iterate over all files by starting with zero and incrementing until you |
| 299 |
hit C<undef>. |
| 300 |
|
| 301 |
This function has to walk the linear list of fils on each access, so |
| 302 |
if you want to iterate over all items, it is usually faster to use |
| 303 |
C<GetFileList>. |
| 304 |
|
| 305 |
=item @items = GetFileList |
| 306 |
|
| 307 |
Similar to C<GetFileListItem>, but returns all files in one go, which is |
| 308 |
very much faster for large number of items, and has no drawbacks when used |
| 309 |
for a small number of items. |
| 310 |
|
| 311 |
=back |
| 312 |
|
| 313 |
=head2 Decoding files |
| 314 |
|
| 315 |
=over |
| 316 |
|
| 317 |
=item $retval = $item->rename ($newname) |
| 318 |
|
| 319 |
Change the ondisk filename where the decoded file will be saved. |
| 320 |
|
| 321 |
=item $retval = $item->decode_temp |
| 322 |
|
| 323 |
Decode the file into a temporary location, use C<< $item->infile >> to |
| 324 |
retrieve the temporary filename. |
| 325 |
|
| 326 |
=item $retval = $item->remove_temp |
| 327 |
|
| 328 |
Remove the temporarily decoded file again. |
| 329 |
|
| 330 |
=item $retval = $item->decode ([$target_path]) |
| 331 |
|
| 332 |
Decode the file to its destination, or the given target path. |
| 333 |
|
| 334 |
=item $retval = $item->info (callback-function) |
| 335 |
|
| 336 |
=back |
| 337 |
|
| 338 |
=head2 Querying (and setting) item attributes |
| 339 |
|
| 340 |
=over |
| 341 |
|
| 342 |
=item $state = $item->state |
| 343 |
|
| 344 |
=item $mode = $item->mode ([newmode]) |
| 345 |
|
| 346 |
=item $uudet = $item->uudet |
| 347 |
|
| 348 |
=item $size = $item->size |
| 349 |
|
| 350 |
=item $filename = $item->filename ([newfilename}) |
| 351 |
|
| 352 |
=item $subfname = $item->subfname |
| 353 |
|
| 354 |
=item $mimeid = $item->mimeid |
| 355 |
|
| 356 |
=item $mimetype = $item->mimetype |
| 357 |
|
| 358 |
=item $binfile = $item->binfile |
| 359 |
|
| 360 |
=back |
| 361 |
|
| 362 |
=head2 Information about source parts |
| 363 |
|
| 364 |
=over |
| 365 |
|
| 366 |
=item $parts = $item->parts |
| 367 |
|
| 368 |
Return information about all parts (source files) used to decode the file |
| 369 |
as a list of hashrefs with the following structure: |
| 370 |
|
| 371 |
{ |
| 372 |
partno => <integer describing the part number, starting with 1>, |
| 373 |
# the following member sonly exist when they contain useful information |
| 374 |
sfname => <local pathname of the file where this part is from>, |
| 375 |
filename => <the ondisk filename of the decoded file>, |
| 376 |
subfname => <used to cluster postings, possibly the posting filename>, |
| 377 |
subject => <the subject of the posting/mail>, |
| 378 |
origin => <the possible source (From) address>, |
| 379 |
mimetype => <the possible mimetype of the decoded file>, |
| 380 |
mimeid => <the id part of the Content-Type>, |
| 381 |
} |
| 382 |
|
| 383 |
Usually you are interested mostly the C<sfname> and possibly the C<partno> |
| 384 |
and C<filename> members. |
| 385 |
|
| 386 |
=back |
| 387 |
|
| 388 |
=head2 Functions below are not documented and not very well tested - feedback welcome |
| 389 |
|
| 390 |
QuickDecode |
| 391 |
EncodeMulti |
| 392 |
EncodePartial |
| 393 |
EncodeToStream |
| 394 |
EncodeToFile |
| 395 |
E_PrepSingle |
| 396 |
E_PrepPartial |
| 397 |
|
| 398 |
=head2 EXTENSION FUNCTIONS |
| 399 |
|
| 400 |
Functions found in this module but not documented in the uulib documentation: |
| 401 |
|
| 402 |
=over |
| 403 |
|
| 404 |
=item $msg = straction ACT_xxx |
| 405 |
|
| 406 |
Return a human readable string representing the given action code. |
| 407 |
|
| 408 |
=item $msg = strerror RET_xxx |
| 409 |
|
| 410 |
Return a human readable string representing the given error code. |
| 411 |
|
| 412 |
=item $str = strencoding xxx_ENCODED |
| 413 |
|
| 414 |
Return the name of the encoding type as a string. |
| 415 |
|
| 416 |
=item $str = strmsglevel MSG_xxx |
| 417 |
|
| 418 |
Returns the message level as a string. |
| 419 |
|
| 420 |
=item SetFileNameCallback $cb |
| 421 |
|
| 422 |
Sets (or queries) the FileNameCallback, which is called whenever the |
| 423 |
decoding library can't find a filename and wants to extract a filename |
| 424 |
from the subject line of a posting. The callback will be called with |
| 425 |
two arguments, the subject line and the current candidate for the |
| 426 |
filename. The latter argument can be C<undef>, which means that no |
| 427 |
filename could be found (and likely no one exists, so it is safe to also |
| 428 |
return C<undef> in this case). If it doesn't return anything (not even |
| 429 |
C<undef>!), then nothing happens, so this is a no-op callback: |
| 430 |
|
| 431 |
sub cb { |
| 432 |
return (); |
| 433 |
} |
| 434 |
|
| 435 |
If it returns C<undef>, then this indicates that no filename could be |
| 436 |
found. In all other cases, the return value is taken to be the filename. |
| 437 |
|
| 438 |
This is a slightly more useful callback: |
| 439 |
|
| 440 |
sub cb { |
| 441 |
return unless $_[1]; # skip "Re:"-plies et al. |
| 442 |
my ($subject, $filename) = @_; |
| 443 |
# if we find some *.rar, take it |
| 444 |
return $1 if $subject =~ /(\w+\.rar)/; |
| 445 |
# otherwise just pass what we have |
| 446 |
return (); |
| 447 |
} |
| 448 |
|
| 449 |
=back |
| 450 |
|
| 451 |
=head1 LARGE EXAMPLE DECODER |
| 452 |
|
| 453 |
The general workflow for decoding is like this: |
| 454 |
|
| 455 |
=over |
| 456 |
|
| 457 |
=item 1. Configure options with C<SetOption> or C<SetXXXCallback>. |
| 458 |
|
| 459 |
=item 2. Load all source files with C<LoadFile>. |
| 460 |
|
| 461 |
=item 3. Optionally C<Smerge>. |
| 462 |
|
| 463 |
=item 4. Iterate over all C<GetFileList> items (i.e. result files). |
| 464 |
|
| 465 |
=item 5. C<CleanUp> to delete files and free items. |
| 466 |
|
| 467 |
=back |
| 468 |
|
| 469 |
What follows is the file C<example-decoder> from the distribution that |
| 470 |
illustrates the above worklfow in a non-trivial example. |
| 471 |
|
| 472 |
#!/usr/bin/perl |
| 473 |
|
| 474 |
# decode all the files in the directory uusrc/ and copy |
| 475 |
# the resulting files to uudst/ |
| 476 |
|
| 477 |
use Convert::UUlib ':all'; |
| 478 |
|
| 479 |
sub namefilter { |
| 480 |
my ($path) = @_; |
| 481 |
|
| 482 |
$path=~s/^.*[\/\\]//; |
| 483 |
|
| 484 |
$path |
| 485 |
} |
| 486 |
|
| 487 |
sub busycb { |
| 488 |
my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_; |
| 489 |
$_[0]=straction($action); |
| 490 |
print "busy_callback(", (join ",",@_), ")\n"; |
| 491 |
0 |
| 492 |
} |
| 493 |
|
| 494 |
SetOption OPT_RBUF, 128*1024; |
| 495 |
SetOption OPT_WBUF, 1024*1024; |
| 496 |
SetOption OPT_IGNMODE, 1; |
| 497 |
SetOption OPT_IGNMODE, 1; |
| 498 |
SetOption OPT_VERBOSE, 1; |
| 499 |
SetOption OPT_AUTOCHK, 0; |
| 500 |
|
| 501 |
# show the three ways you can set callback functions. I normally |
| 502 |
# prefer the one with the sub inplace. |
| 503 |
SetFNameFilter \&namefilter; |
| 504 |
|
| 505 |
SetBusyCallback "busycb", 333; |
| 506 |
|
| 507 |
SetMsgCallback sub { |
| 508 |
my ($msg, $level) = @_; |
| 509 |
print uc strmsglevel $_[1], ": $msg\n"; |
| 510 |
}; |
| 511 |
|
| 512 |
# the following non-trivial FileNameCallback takes care |
| 513 |
# of some subject lines not detected properly by uulib: |
| 514 |
SetFileNameCallback sub { |
| 515 |
return unless $_[1]; # skip "Re:"-plies et al. |
| 516 |
local $_ = $_[0]; |
| 517 |
|
| 518 |
# the following rules are rather effective on some newsgroups, |
| 519 |
# like alt.binaries.games.anime, where non-mime, uuencoded data |
| 520 |
# is very common |
| 521 |
|
| 522 |
# if we find some *.rar, take it as the filename |
| 523 |
return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i; |
| 524 |
|
| 525 |
# one common subject format |
| 526 |
return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i; |
| 527 |
|
| 528 |
# - filename.par (04/55) |
| 529 |
return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i; |
| 530 |
|
| 531 |
# - (xxx) No. 1 sayuri81.jpg 756565 bytes |
| 532 |
# - (20 files) No.17 Roseanne.jpg [2/2] |
| 533 |
return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/; |
| 534 |
|
| 535 |
# try to detect some common forms of filenames |
| 536 |
return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i; |
| 537 |
|
| 538 |
# otherwise just pass what we have |
| 539 |
() |
| 540 |
}; |
| 541 |
|
| 542 |
# now read all files in the directory uusrc/* |
| 543 |
for (<uusrc/*>) { |
| 544 |
my ($retval, $count) = LoadFile ($_, $_, 1); |
| 545 |
print "file($_), status(", strerror $retval, ") parts($count)\n"; |
| 546 |
} |
| 547 |
|
| 548 |
Smerge -1; |
| 549 |
|
| 550 |
SetOption OPT_SAVEPATH, "uudst/"; |
| 551 |
|
| 552 |
# now wade through all files and their source parts |
| 553 |
for my $uu (GetFileList) { |
| 554 |
print "file ", $uu->filename, "\n"; |
| 555 |
print " state ", $uu->state, "\n"; |
| 556 |
print " mode ", $uu->mode, "\n"; |
| 557 |
print " uudet ", strencoding $uu->uudet, "\n"; |
| 558 |
print " size ", $uu->size, "\n"; |
| 559 |
print " subfname ", $uu->subfname, "\n"; |
| 560 |
print " mimeid ", $uu->mimeid, "\n"; |
| 561 |
print " mimetype ", $uu->mimetype, "\n"; |
| 562 |
|
| 563 |
# print additional info about all parts |
| 564 |
print " parts"; |
| 565 |
for ($uu->parts) { |
| 566 |
for my $k (sort keys %$_) { |
| 567 |
print " $k=$_->{$k}"; |
| 568 |
} |
| 569 |
print "\n"; |
| 570 |
} |
| 571 |
|
| 572 |
$uu->remove_temp; |
| 573 |
|
| 574 |
if (my $err = $uu->decode) { |
| 575 |
print " ERROR ", strerror $err, "\n"; |
| 576 |
} else { |
| 577 |
print " successfully saved as uudst/", $uu->filename, "\n"; |
| 578 |
} |
| 579 |
} |
| 580 |
|
| 581 |
print "cleanup...\n"; |
| 582 |
|
| 583 |
CleanUp; |
| 584 |
|
| 585 |
=head1 PERLMULTICORE SUPPORT |
| 586 |
|
| 587 |
This module supports the perlmulticore standard (see |
| 588 |
L<http://perlmulticore.schmorp.de/> for more info) for the following |
| 589 |
functions - generally these are functions accessing the disk and/or using |
| 590 |
considerable CPU time: |
| 591 |
|
| 592 |
LoadFile |
| 593 |
$item->decode |
| 594 |
$item->decode_temp |
| 595 |
$item->remove_temp |
| 596 |
$item->info |
| 597 |
|
| 598 |
The perl interpreter will be reacquired/released on every callback |
| 599 |
invocation, so for performance reasons, callbacks should be avoided if |
| 600 |
that is costly. |
| 601 |
|
| 602 |
Future versions might enable multicore support for more functions. |
| 603 |
|
| 604 |
=head1 BUGS AND LIMITATIONS |
| 605 |
|
| 606 |
The original uulib library this module uses was written at a time where |
| 607 |
main memory was measured in megabytes and buffer overflows as a security |
| 608 |
thing didn't exist. While a lot of security fixes have been applied over |
| 609 |
the years (including some defense in depth mechanism that can shield |
| 610 |
against a lot of as-of-yet undetected bugs), using this library for |
| 611 |
security purposes requires care. |
| 612 |
|
| 613 |
Likewise, file sizes when the uulib library was written were tiny compared |
| 614 |
to today, so do not expect this library to handle files larger than 2GB, |
| 615 |
certainly not on a 32 bit host. |
| 616 |
|
| 617 |
Lastly, this module uses a very "C-like" interface, which means it doesn't |
| 618 |
protect you from invalid pointers as you might expect from "more perlish" |
| 619 |
modules - for example, accessing a file item object after calling |
| 620 |
C<CleanUp> will likely result in crashes, memory corruption, or worse. |
| 621 |
|
| 622 |
=head1 AUTHOR |
| 623 |
|
| 624 |
Marc Lehmann <schmorp@schmorp.de>, the original uulib library was written |
| 625 |
by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily |
| 626 |
bugfixed by Marc Lehmann. |
| 627 |
|
| 628 |
=head1 SEE ALSO |
| 629 |
|
| 630 |
perl(1), uudeview homepage at L<http://www.fpx.de/fp/Software/UUDeview/>. |
| 631 |
|
| 632 |
=cut |
| 633 |
|