1 |
=head1 NAME |
2 |
|
3 |
Perl::LibExtractor - determine perl library subsets for building distributions |
4 |
|
5 |
=head1 SYNOPSIS |
6 |
|
7 |
use Perl::LibExtractor; |
8 |
|
9 |
=head1 DESCRIPTION |
10 |
|
11 |
The purpose of this module is to determine subsets of your perl library, |
12 |
that is, a set of files needed to satisfy certain dependencies (e.g. of a |
13 |
program). |
14 |
|
15 |
The goal is to extract a part of your perl installation including |
16 |
dependencies. A typical use case for this module would be to find out |
17 |
which files are needed to be build a L<PAR> distribution, to link into |
18 |
an L<App::Staticperl> binary, or to pack with L<Urlader>, to create |
19 |
stand-alone distributions tailormade to run your app. |
20 |
|
21 |
=head1 METHODS |
22 |
|
23 |
To use this module, first call the C<new>-constructor and then as many |
24 |
other methods as you want, to generate a set of files. Then query the set |
25 |
of files and do whatever you want with them. |
26 |
|
27 |
The command-line utility F<perl-libextractor> can be a convenient |
28 |
alternative to using this module directly, and offers a few extra options, |
29 |
such as to copy out the files into a new directory, strip them and/or |
30 |
manipulate them in other ways. |
31 |
|
32 |
=cut |
33 |
|
34 |
package Perl::LibExtractor; |
35 |
|
36 |
our $VERSION = '1.1'; |
37 |
|
38 |
use Config; |
39 |
use File::Spec (); |
40 |
use File::Temp (); |
41 |
|
42 |
use common::sense; |
43 |
|
44 |
sub I_SRC () { 0 } |
45 |
sub I_DEP () { 1 } |
46 |
|
47 |
sub croak($) { |
48 |
require Carp; |
49 |
Carp::croak "(Perl::LibExtractor) $_[0]"; |
50 |
} |
51 |
|
52 |
my $canonpath = File::Spec->can ("canonpath"); |
53 |
my $case_tolerant = File::Spec->case_tolerant; |
54 |
|
55 |
sub canonpath($) { |
56 |
local $_ = $canonpath->(File::Spec::, $_[0]); |
57 |
s%\\%/%g; |
58 |
# $_ = lc if $case_tolerant; # we assume perl file name case is always the same |
59 |
$_ |
60 |
} |
61 |
|
62 |
=head2 CREATION |
63 |
|
64 |
=over 4 |
65 |
|
66 |
=item $extractor = new Perl::LibExtractor [key => value...] |
67 |
|
68 |
Creates a new extractor object. Each extractor object stores some |
69 |
configuration options and a subset of files that can be queried at any |
70 |
time,. |
71 |
|
72 |
Binary executables (such as the perl interpreter) are stored inside |
73 |
F<bin/>, perl scripts are stored under F<script/>, perl library files are |
74 |
stored under F<lib/> and shared libraries are stored under F<dll/>. |
75 |
|
76 |
The following key-value pairs exist, with default values as specified. |
77 |
|
78 |
=over 4 |
79 |
|
80 |
=item inc => \@INC without "." |
81 |
|
82 |
An arrayref with paths to perl library directories. The default is |
83 |
C<\@INC>, with F<.> removed. |
84 |
|
85 |
To prepend custom dirs just do this: |
86 |
|
87 |
inc => ["mydir", @INC], |
88 |
|
89 |
=item use_packlist => 1 |
90 |
|
91 |
Enable (if true) or disable the use of C<.packlist> files. If enabled, |
92 |
then each time a file is traced, the complete distribution that contains |
93 |
it is included (but not traced). |
94 |
|
95 |
If disabled, only shared objects and autoload files will be added. |
96 |
|
97 |
Debian GNU/Linux doesn't completely package perl or any perl modules, so |
98 |
this option will fail. Other perls should be fine. |
99 |
|
100 |
=item extra_deps => { file => [files...] } |
101 |
|
102 |
Some (mainly runtime dependencies in the perl core library) cannot be |
103 |
detected automatically by this module, especially if you don't use |
104 |
packlists and C<add_core>. |
105 |
|
106 |
This module comes with a set of default dependencies (such as L<Carp> |
107 |
requiring L<Carp::Heavy>), which you cna override with this parameter. |
108 |
|
109 |
To see the default set of dependencies that come with this module, use |
110 |
this: |
111 |
|
112 |
perl -MPerl::LibExtractor -MData::Dumper -e 'print Dumper $Perl::LibExtractor::EXTRA_DEPS' |
113 |
|
114 |
=back |
115 |
|
116 |
=cut |
117 |
|
118 |
our $EXTRA_DEPS = { |
119 |
'bytes.pm' => ['bytes_heavy.pl'], |
120 |
'utf8.pm' => ['utf8_heavy.pl'], |
121 |
'Config.pm' => ['Config_heavy.pl', 'Config_git.pl'], |
122 |
'Carp.pm' => ['Carp/Heavy.pm'], |
123 |
'Exporter.pm' => ['Exporter/Heavy.pm'], |
124 |
}; |
125 |
|
126 |
sub new { |
127 |
my ($class, %kv) = @_; |
128 |
|
129 |
my $self = bless { |
130 |
inc => [grep $_ ne ".", @INC], |
131 |
use_packlist => 1, |
132 |
extra_deps => $EXTRA_DEPS, |
133 |
%kv, |
134 |
set => {}, |
135 |
}, $class; |
136 |
|
137 |
my %inc_seen; |
138 |
my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} }; |
139 |
$self->{inc} = \@inc; |
140 |
|
141 |
# maybe not inc, but these? |
142 |
# sitearchexp |
143 |
# sitelib |
144 |
# vendorarchexp |
145 |
# vendorlibexp |
146 |
# archlibexp |
147 |
# privlibexp |
148 |
|
149 |
$self->_set_inc; |
150 |
|
151 |
$self |
152 |
} |
153 |
|
154 |
sub _perl_path() { |
155 |
my $secure_perl_path = $Config{perlpath}; |
156 |
|
157 |
if ($^O ne 'VMS') { |
158 |
$secure_perl_path .= $Config{_exe} |
159 |
unless $secure_perl_path =~ m/$Config{_exe}$/i; |
160 |
} |
161 |
|
162 |
$secure_perl_path |
163 |
} |
164 |
|
165 |
sub _path2match { |
166 |
my $re = join "|", map "\Q$_", @_; |
167 |
|
168 |
$re = "^(?:$re)\\/"; |
169 |
$re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed |
170 |
|
171 |
$case_tolerant |
172 |
? qr<$re>i |
173 |
: qr<$re> |
174 |
} |
175 |
|
176 |
sub _read_packlist { |
177 |
my ($self, $path) = @_; |
178 |
|
179 |
my $pfxmatch = $self->{pfxmatch}; |
180 |
my $lib = $self->{lib}; |
181 |
|
182 |
my @packlist; |
183 |
|
184 |
open my $fh, "<:perlio", $path |
185 |
or die "$path: $!"; |
186 |
|
187 |
while (<$fh>) { |
188 |
chomp; |
189 |
s/ .*$//; # newer-style .packlists might contain key=value pairs |
190 |
|
191 |
s%\\%/%g; # we only do unix-style paths internally |
192 |
|
193 |
s/$pfxmatch// and exists $lib->{$_} |
194 |
or next; |
195 |
|
196 |
push @packlist, canonpath $_; |
197 |
} |
198 |
|
199 |
\@packlist |
200 |
} |
201 |
|
202 |
sub _set_inc { |
203 |
my ($self) = @_; |
204 |
|
205 |
my $pfxmatch = _path2match @{ $self->{inc }}; |
206 |
|
207 |
my %lib; |
208 |
my @packlists; |
209 |
|
210 |
# find all files in all libdirs, earlier ones overwrite later ones |
211 |
my @scan = map [$_, ""], @{ $self->{inc} }; |
212 |
|
213 |
while (@scan) { |
214 |
my ($root, $dir) = @{ pop @scan }; |
215 |
|
216 |
my $pfx = length $dir ? "$dir/" : ""; |
217 |
|
218 |
for (do { |
219 |
opendir my $fh, "$root/$dir" |
220 |
or croak "$root/$dir: $!"; |
221 |
grep !/^\.\.?$/, readdir $fh |
222 |
}) { |
223 |
if (-d "$root/$dir/$_/.") { |
224 |
$lib{"$pfx$_/"} = "$root/$pfx$_"; |
225 |
push @scan, [$root, "$pfx$_"]; |
226 |
} elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) { |
227 |
push @packlists, "$root/$pfx.packlist"; |
228 |
} elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") { |
229 |
# skip empty .bs files |
230 |
# } elsif (/\.(?:pod|h|html)$/) { |
231 |
# # not interested in those |
232 |
} else { |
233 |
#push @files, $_; |
234 |
$lib{"$pfx$_"} = "$root/$pfx$_"; |
235 |
} |
236 |
} |
237 |
|
238 |
#$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite |
239 |
} |
240 |
|
241 |
$self->{lib} = \%lib; |
242 |
$self->{pfxmatch} = $pfxmatch; |
243 |
|
244 |
my %packlist; |
245 |
|
246 |
# need to go forward here |
247 |
for (@packlists) { |
248 |
my $packlist = $self->_read_packlist ($_); |
249 |
|
250 |
$packlist{$_} = $packlist |
251 |
for @$packlist; |
252 |
} |
253 |
|
254 |
$self->{packlist} = \%packlist; |
255 |
} |
256 |
|
257 |
=back |
258 |
|
259 |
=head2 TRACE/PACKLIST BASED ADDING |
260 |
|
261 |
The following methods add various things to the set of files. |
262 |
|
263 |
Each time a perl file is added, it is scanned by tracing either loading, |
264 |
execution or compiling it, and seeing which other perl modules and |
265 |
libraries have been loaded. |
266 |
|
267 |
For each library file found this way, additional dependencies are added: |
268 |
if packlists are enabled, then all files of the distribution that contains |
269 |
the file will be added. If packlists are disabled, then only shared |
270 |
objects and autoload files for modules will be added. |
271 |
|
272 |
Only files from perl library directories will be added automatically. Any |
273 |
other files (such as manpages or scripts installed in the F<bin> |
274 |
directory) are skipped. |
275 |
|
276 |
If there is an error, such as a module not being found, then this module |
277 |
croaks (as opposed to silently skipping). If you want to add something of |
278 |
which you are not sure it exists, then you can wrap the call into C<eval |
279 |
{}>. In some cases, you can avoid this by executing the code you want |
280 |
to work later using C<add_eval> - see C<add_core_support> for an actual |
281 |
example of this technique. |
282 |
|
283 |
Note that packlists are meant to add files not covered by other |
284 |
mechanisms, such as resource files and other data files loaded directly by |
285 |
a module - they are not meant to add dependencies that are missed because |
286 |
they only happen at runtime. |
287 |
|
288 |
For example, with packlists, when using L<AnyEvent>, then all event loop |
289 |
backends are automatically added as well, but I<not> any event loops |
290 |
(i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without |
291 |
packlists, only the backend that is being used is added (i.e. normally |
292 |
none, as loading AnyEvent does not instantly load any backend). |
293 |
|
294 |
To catch the extra event loop dependencies, you can either initialise |
295 |
AnyEvent so it picks a suitable backend: |
296 |
|
297 |
$extractor->add_eval ("use AnyEvent; AnyEvent::detect"); |
298 |
|
299 |
Or you can directly load the backend modules you plan to use: |
300 |
|
301 |
$extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl"); |
302 |
|
303 |
An example of a program (or module) that has extra resource files is |
304 |
L<Deliantra::Client> - the normal tracing (without packlist usage) will |
305 |
correctly add all submodules, but miss the fonts and textures. By using |
306 |
the packlist, those files are added correctly. |
307 |
|
308 |
=over 4 |
309 |
|
310 |
=cut |
311 |
|
312 |
sub _add { |
313 |
my ($self, $add) = @_; |
314 |
|
315 |
my $lib = $self->{lib}; |
316 |
my $path; |
317 |
|
318 |
for (@$add) { |
319 |
$path = "lib/$_"; |
320 |
|
321 |
$self->{set}{$path} ||= do { |
322 |
my @info; |
323 |
|
324 |
$info[I_SRC] = $lib->{$_} |
325 |
or croak "$_: unable to locate file in perl library"; |
326 |
|
327 |
if ($self->{use_packlist} && exists $self->{packlist}{$_}) { |
328 |
$self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die] |
329 |
for @{ $self->{packlist}{$_} }; |
330 |
|
331 |
# for (grep /\.pm$/, @{ $self->{packlist}{$_} }) { |
332 |
# s/\.pm$//; |
333 |
# s%/%::%g; |
334 |
# my $pkg = "libextractor" . ++$self->{count}; |
335 |
# $self->add_eval ("{ package $pkg; eval 'use $_' }") |
336 |
# unless $self->{_add_do}{$_}++; |
337 |
# } |
338 |
# |
339 |
# $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00") |
340 |
# for grep /\.pl$/, @{ $self->{packlist}{$_} }; |
341 |
|
342 |
} elsif (/^(.*)\.pm$/) { |
343 |
(my $auto = "auto/$1/") =~ s%::%/%g; |
344 |
$auto =~ m%/([^/]+)/$% or die; |
345 |
my $base = $1; |
346 |
|
347 |
if (exists $lib->{$auto}) { |
348 |
# auto dir exists, scan it for cool stuff |
349 |
|
350 |
# 1. shared object, others are of no interest to us |
351 |
my $so = "$auto$base.$Config{dlext}"; |
352 |
if (my $src = $lib->{$so}) { |
353 |
$so = "lib/$so"; |
354 |
push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src]; |
355 |
} |
356 |
|
357 |
# 2. autoloader/autosplit |
358 |
my $ix = "${auto}autosplit.ix"; |
359 |
if (my $src = $lib->{$ix}) { |
360 |
$ix = "lib/$ix"; |
361 |
push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src]; |
362 |
|
363 |
open my $fh, "<:perlio", $src |
364 |
or croak "$src: $!"; |
365 |
|
366 |
my $package; |
367 |
|
368 |
while (<$fh>) { |
369 |
if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) { |
370 |
my $al = "auto/$package/$1.al"; |
371 |
my $src = $lib->{$al} |
372 |
or croak "$al: autoload file not found, but should be there."; |
373 |
|
374 |
$al = "lib/$al"; |
375 |
push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src]; |
376 |
|
377 |
} elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) { |
378 |
($package = $1) =~ s/::/\//g; |
379 |
} elsif (/^\s*(?:#|1?\s*;?\s*$)/) { |
380 |
# nop |
381 |
} else { |
382 |
warn "WARNING: $src: unparsable line, please report: $_"; |
383 |
} |
384 |
} |
385 |
} |
386 |
|
387 |
skip: |
388 |
} |
389 |
} |
390 |
|
391 |
if (exists $self->{extra_deps}{$_}) { |
392 |
# we require it again, because many extra dependencies require the main module to be loaded |
393 |
$self->add_eval ("require q\x00$_\x00"); |
394 |
|
395 |
exists $lib->{$_} and $self->add_require ($_) |
396 |
for @{ $self->{extra_deps}{$_} }; |
397 |
} |
398 |
|
399 |
\@info |
400 |
}; |
401 |
} |
402 |
} |
403 |
|
404 |
sub _trace { |
405 |
my ($self, $file, $eval) = @_; |
406 |
|
407 |
$self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n"; |
408 |
} |
409 |
|
410 |
sub _trace_flush { |
411 |
my ($self) = @_; |
412 |
|
413 |
# ->_add might add additional files to trace |
414 |
while (exists $self->{trace_begin} or exists $self->{trace_check}) { |
415 |
my $tmpdir = newdir File::Temp; |
416 |
my $dir = $tmpdir->dirname; |
417 |
|
418 |
open my $fh, ">:perlio", "$dir/eval" |
419 |
or croak "$dir/eval: $!"; |
420 |
syswrite $fh, |
421 |
'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n" |
422 |
. "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n" |
423 |
. 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n" |
424 |
. "CHECK {\n" |
425 |
. 'open STDOUT, ">:raw", "out" or die "out: $!";' |
426 |
. 'print join "\x00", values %INC;' |
427 |
. 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl |
428 |
. "}\n" |
429 |
. (delete $self->{trace_check}); |
430 |
close $fh; |
431 |
|
432 |
system _perl_path, "-c", "$dir/eval" |
433 |
and croak "trace failure, check trace process output - caught"; |
434 |
|
435 |
my @inc = split /\x00/, do { |
436 |
open my $fh, "<:perlio", "$dir/out" |
437 |
or croak "$dir/out: $!"; |
438 |
local $/; |
439 |
scalar readline $fh |
440 |
}; |
441 |
|
442 |
my $pfxmatch = $self->{pfxmatch}; |
443 |
|
444 |
# remove the library directory prefix, hope for the best |
445 |
s/$pfxmatch// |
446 |
or croak "$_: file outside any library directory" |
447 |
for @inc; |
448 |
|
449 |
$self->_add (\@inc); |
450 |
} |
451 |
} |
452 |
|
453 |
=item $extractor->add_mod ($module[, $module...]) |
454 |
|
455 |
Adds the given module(s) to the file set - the module name must be specified |
456 |
as in C<use>, i.e. with C<::> as separators and without F<.pm>. |
457 |
|
458 |
The program will be loaded with the default import list, any dependent |
459 |
files, such as the shared object implementing xs functions, or autoload |
460 |
files, will also be added. |
461 |
|
462 |
If you want to use a different import list (for those rare modules wghere |
463 |
import lists trigger different backend modules to be loaded for example), |
464 |
you can use C<add_eval> instead: |
465 |
|
466 |
$extractor->add_eval ("use Module qw(a b c)"); |
467 |
|
468 |
Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files |
469 |
from the distribution they are part of. |
470 |
|
471 |
$extractor->add_mod ("Coro", "AnyEvent::AIO"); |
472 |
|
473 |
=cut |
474 |
|
475 |
sub add_mod { |
476 |
my $self = shift; |
477 |
|
478 |
for (@_) { |
479 |
my $pkg = "libextractor" . ++$self->{count}; |
480 |
$self->_trace ("use $_", "{ package $pkg; use $_ }") |
481 |
unless $self->{add_mod}{$_}++; |
482 |
} |
483 |
} |
484 |
|
485 |
=item $extractor->add_require ($name[, $name...]) |
486 |
|
487 |
Works like C<add_mod>, but uses C<require $name> to load the module, i.e. |
488 |
the name must be a filename. |
489 |
|
490 |
Example: load Coro and AnyEvent::AIO, but using C<add_require> instead of C<add_mod>. |
491 |
|
492 |
$extractor->add_require ("Coro.pm", "AnyEvent/AIO.pm"); |
493 |
|
494 |
=cut |
495 |
|
496 |
sub add_require { |
497 |
my $self = shift; |
498 |
|
499 |
for (@_) { |
500 |
$self->add_eval ("require q\x00$_\x00") |
501 |
unless $self->{add_require}{$_}++; |
502 |
} |
503 |
} |
504 |
|
505 |
=item $extractor->add_bin ($name[, $name...]) |
506 |
|
507 |
Adds the given (perl) program(s) to the file set, that is, a program |
508 |
installed by some perl module, written in perl (an example would be the |
509 |
L<perl-libextractor> program that is part of the C<Perl::LibExtractor> |
510 |
distribution). |
511 |
|
512 |
Example: add the deliantra client program installed by the |
513 |
L<Deliantra::Client> module and put it under F<bin/deliantra>. |
514 |
|
515 |
$extractor->add_bin ("deliantra"); |
516 |
|
517 |
=cut |
518 |
|
519 |
sub add_bin { |
520 |
my $self = shift; |
521 |
|
522 |
exe: |
523 |
for my $exe (@_) { |
524 |
for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) { |
525 |
if (open my $fh, "<:perlio", "$dir/$exe") { |
526 |
if (-f $fh) { |
527 |
my $file = do { local $/; readline $fh }; |
528 |
|
529 |
$self->_trace_flush if exists $self->{trace_check}; |
530 |
$self->{trace_check} = $file; |
531 |
|
532 |
$self->{set}{"bin/$exe"} = ["$dir/$exe"]; |
533 |
next exe; |
534 |
} |
535 |
} |
536 |
} |
537 |
|
538 |
croak "add_bin ($exe): executable not found"; |
539 |
} |
540 |
} |
541 |
|
542 |
=item $extractor->add_eval ($string) |
543 |
|
544 |
Evaluates the string as perl code and adds all modules that are loaded |
545 |
by it. For example, this would add L<AnyEvent> and the default backend |
546 |
implementation module and event loop module: |
547 |
|
548 |
$extractor->add_eval ("use AnyEvent; AnyEvent::detect"); |
549 |
|
550 |
Each code snippet will be executed in its own package and under C<use |
551 |
strict>. |
552 |
|
553 |
=cut |
554 |
|
555 |
sub add_eval { |
556 |
my ($self, $eval) = @_; |
557 |
|
558 |
(my $file = substr $eval, 0, 64) =~ s/\015?\012/\\n/g; |
559 |
|
560 |
my $pkg = "libextractor" . ++$self->{count}; |
561 |
$eval =~ s/\x00/\x00."\\x00".q\x00/g; |
562 |
$self->_trace ($file, |
563 |
"local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8 |
564 |
. "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n" |
565 |
); |
566 |
} |
567 |
|
568 |
=back |
569 |
|
570 |
=head2 OTHER METHODS FOR ADDING FILES |
571 |
|
572 |
The following methods add commonly used files that are either not covered |
573 |
by other methods or add commonly-used dependencies. |
574 |
|
575 |
=over 4 |
576 |
|
577 |
=item $extractor->add_perl |
578 |
|
579 |
Adds the perl binary itself to the file set, including the libperl dll, if |
580 |
needed. |
581 |
|
582 |
For example, on UNIX systems, this usually adds a F<exe/perl> and possibly |
583 |
some F<dll/libperl.so.XXX>. |
584 |
|
585 |
=cut |
586 |
|
587 |
sub add_perl { |
588 |
my ($self) = @_; |
589 |
|
590 |
$self->{set}{"exe/perl$Config{_exe}"} = [_perl_path]; |
591 |
|
592 |
# on debian, we have the special case of a perl binary linked against |
593 |
# a static libperl.a (which is not available), but the Config says to use |
594 |
# a shared library, which is in the wrong directory, too (which breaks |
595 |
# every other perl installation on the system - they are so stupid). |
596 |
|
597 |
# that means we can't find the libperl.so, because dbeian actively breaks |
598 |
# their perl install, and we don't need it. we work around this by silently |
599 |
# not including the libperl if we cannot find it. |
600 |
|
601 |
if ($Config{useshrplib} eq "true") { |
602 |
my ($libperl, $libpath); |
603 |
|
604 |
if ($^O eq "cygwin") { |
605 |
$libperl = $Config{libperl}; |
606 |
$libpath = "$Config{binexp}/$libperl"; |
607 |
} elsif ($^O eq "MSWin32") { |
608 |
($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/; |
609 |
$libpath = "$Config{binexp}/$libperl"; |
610 |
} else { |
611 |
$libperl = $Config{libperl}; |
612 |
$libpath = $self->{lib}{"CORE/$libperl"}; |
613 |
} |
614 |
|
615 |
$self->{set}{"dll/$libperl"} = [$libpath] |
616 |
if length $libpath && -e $libpath; |
617 |
} |
618 |
} |
619 |
|
620 |
=item $extractor->add_core_support |
621 |
|
622 |
Try to add modules and files needed to support commonly-used builtin |
623 |
language features. For example to open a scalar for I/O you need the |
624 |
L<PerlIO::scalar> module: |
625 |
|
626 |
open $fh, "<", \$scalar |
627 |
|
628 |
A number of regex and string features (e.g. C<ucfirst>) need some unicore |
629 |
files, e.g.: |
630 |
|
631 |
'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i'; |
632 |
|
633 |
This call adds these files (simply by executing code similar to the above |
634 |
code fragments). |
635 |
|
636 |
Notable things that are missing are other PerlIO layers, such as |
637 |
L<PerlIO::encoding>, and named character and character class matches. |
638 |
|
639 |
=cut |
640 |
|
641 |
sub add_core_support { |
642 |
my ($self) = @_; |
643 |
|
644 |
$self->add_eval (' |
645 |
# PerlIO::Scalar |
646 |
my $v; open my $fh, "<", \$v; |
647 |
|
648 |
# various unicore regex/builtin gambits |
649 |
my $x = chr 1234; |
650 |
"\u$x\U$x\l$x\L$x"; |
651 |
$x =~ /$_$x?/i |
652 |
for qw(\d \w \s \b \R \h \v); |
653 |
split " ", $x; # usually covered by the regex above |
654 |
'); |
655 |
|
656 |
$self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above |
657 |
} |
658 |
|
659 |
=item $extractor->add_unicore |
660 |
|
661 |
Adds (hopefully) all files from the unicore database that will ever be |
662 |
needed. |
663 |
|
664 |
If you are not sure which unicode character classes and similar unicore |
665 |
databases you need, and you do not care about an extra one thousand(!) |
666 |
files comprising 4MB of data, then you can just call this method, which |
667 |
adds basically all files from perl's unicode database. |
668 |
|
669 |
Note that C<add_core_support> also adds some unicore files, but it's not a |
670 |
subset of C<add_unicore> - the former adds all files neccessary to support |
671 |
core builtins (which includes some unicore files and other things), while |
672 |
the latter adds all unicore files (but nothing else). |
673 |
|
674 |
When in doubt, use both. |
675 |
|
676 |
=cut |
677 |
|
678 |
sub add_unicore { |
679 |
my ($self) = @_; |
680 |
|
681 |
$self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]); |
682 |
} |
683 |
|
684 |
=item $extractor->add_core |
685 |
|
686 |
This adds all files from the perl core distribution, that is, all library |
687 |
files that come with perl. |
688 |
|
689 |
This is a superset of C<add_core_support> and C<add_unicore>. |
690 |
|
691 |
This is quite a lot, but on the plus side, you can be sure nothing is |
692 |
missing. |
693 |
|
694 |
This requires a full perl installation - Debian GNU/Linux doesn't package |
695 |
the full perl library, so this function will not work there. |
696 |
|
697 |
=cut |
698 |
|
699 |
sub add_core { |
700 |
my ($self) = @_; |
701 |
|
702 |
my $lib = $self->{lib}; |
703 |
|
704 |
for (@{ |
705 |
$self->_read_packlist (".packlist") |
706 |
}) { |
707 |
$self->{set}{$_} ||= [ |
708 |
"lib/" |
709 |
. ($lib->{$_} or croak "$_: unable to locate file in perl library") |
710 |
]; |
711 |
} |
712 |
} |
713 |
|
714 |
=back |
715 |
|
716 |
=head2 GLOB-BASED ADDING AND FILTERING |
717 |
|
718 |
These methods add or manipulate files by using glob-based patterns. |
719 |
|
720 |
These glob patterns work similarly to glob patterns in the shell: |
721 |
|
722 |
=over 4 |
723 |
|
724 |
=item / |
725 |
|
726 |
A F</> at the start of the pattern interprets the pattern as a file |
727 |
path inside the file set, almost the same as in the shell. For example, |
728 |
F</bin/perl*> would match all files whose names starting with F<perl> |
729 |
inside the F<bin> directory in the set. |
730 |
|
731 |
If the F</> is missing, then the pattern is interpreted as a module name |
732 |
(a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> , |
733 |
while F<Coro::*> would match F<lib/Coro/*.pm>. |
734 |
|
735 |
=item * |
736 |
|
737 |
A single star matches anything inside a single directory component. For |
738 |
example, F</lib/Coro/*.pm> would match all F<.pm> files inside the |
739 |
F<lib/Coro/> directory, but not any files deeper in the hierarchy. |
740 |
|
741 |
Another way to look at it is that a single star matches anything but a |
742 |
slash (F</>). |
743 |
|
744 |
=item ** |
745 |
|
746 |
A double star matches any number of characters in the path, including F</>. |
747 |
|
748 |
For example, F<AnyEvent::**> would match all modules whose names start |
749 |
with C<AnyEvent::>, no matter how deep in the hierarchy they are. |
750 |
|
751 |
=back |
752 |
|
753 |
=cut |
754 |
|
755 |
sub _extglob2re { |
756 |
for (quotemeta $_[1]) { |
757 |
s/\\\*\\\*/.*/g; |
758 |
s/\\\*/[^\/]*/g; |
759 |
s/\\\?/[^\/]/g; |
760 |
|
761 |
unless (s%^\\/%%) { |
762 |
s%\\:\\:%/%g; |
763 |
$_ = "lib/$_\\.pm"; |
764 |
} |
765 |
|
766 |
$_ .= '$'; |
767 |
s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end |
768 |
|
769 |
return qr<^$_>s |
770 |
} |
771 |
} |
772 |
|
773 |
=over 4 |
774 |
|
775 |
=item $extractor->add_glob ($modglob[, $modglob...]) |
776 |
|
777 |
Adds all files from the perl library that match the given glob pattern. |
778 |
|
779 |
For example, you could implement C<add_unicore> yourself like this: |
780 |
|
781 |
$extractor->add_glob ("/unicore/**.pl"); |
782 |
|
783 |
=cut |
784 |
|
785 |
sub add_glob { |
786 |
my $self = shift; |
787 |
|
788 |
for (@_) { |
789 |
my $pat = $self->_extglob2re ($_); |
790 |
$self->_add ([grep /$pat/, keys %{ $self->{lib} }]); |
791 |
} |
792 |
} |
793 |
|
794 |
=item $extractor->filter ($pattern[, $pattern...]) |
795 |
|
796 |
Applies a series of include/exclude filters. Each filter must start with |
797 |
either C<+> or C<->, to designate the pattern as I<include> or I<exclude> |
798 |
pattern. The rest of the pattern is a normal glob pattern. |
799 |
|
800 |
An exclude pattern (C<->) instantly removes all matching files from |
801 |
the set. An include pattern (C<+>) protects matching files from later |
802 |
removals. |
803 |
|
804 |
That is, if you have an include pattern then all files that were matched |
805 |
by it will be included in the set, regardless of any further exclude |
806 |
patterns matching the same files. |
807 |
|
808 |
Likewise, any file excluded by a pattern will not be included in the set, |
809 |
even if matched by later include patterns. |
810 |
|
811 |
Any files not matched by any expression will simply stay in the set. |
812 |
|
813 |
For example, to remove most of the useless autoload functions by the POSIX |
814 |
module (they either do the same thing as a builtin or always raise an |
815 |
error), you would use this: |
816 |
|
817 |
$extractor->filter ("-/lib/auto/POSIX/*.al"); |
818 |
|
819 |
This does not remove all autoload files, only the ones not defined by a |
820 |
subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone). |
821 |
|
822 |
=cut |
823 |
|
824 |
sub filter { |
825 |
my ($self, @patterns) = @_; |
826 |
|
827 |
$self->_trace_flush; |
828 |
|
829 |
my $set = $self->{set}; |
830 |
my %include; |
831 |
|
832 |
for my $pat (@patterns) { |
833 |
$pat =~ s/^([+\-])// |
834 |
or croak "$_: not a valid filter pattern (missing + or - prefix)"; |
835 |
my $inc = $1 eq "+"; |
836 |
$pat = $self->_extglob2re ($pat); |
837 |
|
838 |
my @match = grep /$pat/, keys %$set; |
839 |
|
840 |
if ($inc) { |
841 |
@include{@match} = delete @$set{@match}; |
842 |
} else { |
843 |
delete @$set{@{ $_->[I_DEP] }} # remove dependents |
844 |
for delete @$set{@match}; |
845 |
} |
846 |
} |
847 |
|
848 |
my @include = keys %include; |
849 |
@$set{@include} = delete @include{@include}; |
850 |
} |
851 |
|
852 |
=item $extractor->runtime_only |
853 |
|
854 |
This removes all files that are not needed at runtime, such as static |
855 |
archives, header and other files needed only for compilation of modules, |
856 |
and pod and html files (which are unlikely to be needed at runtime). |
857 |
|
858 |
This is quite useful when you want to have only files actually needed to |
859 |
execute a program. |
860 |
|
861 |
=cut |
862 |
|
863 |
sub runtime_only { |
864 |
my ($self) = @_; |
865 |
|
866 |
$self->_trace_flush; |
867 |
|
868 |
my $set = $self->{set}; |
869 |
|
870 |
# delete all static libraries, also windows stuff |
871 |
delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set }; |
872 |
|
873 |
# delete all extralibs.ld and extralibs.all (no clue what the latter is for) |
874 |
delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set }; |
875 |
|
876 |
# delete all .pod, .h, .html files (hopefully none of them are used at runtime) |
877 |
delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set }; |
878 |
|
879 |
# delete unneeded unicore files |
880 |
delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set }; |
881 |
} |
882 |
|
883 |
=back |
884 |
|
885 |
=head2 RESULT SET |
886 |
|
887 |
=over 4 |
888 |
|
889 |
=item $set = $extractor->set |
890 |
|
891 |
Returns a hash reference that represents the result set. The hash is the |
892 |
actual internal storage hash and can only be modified as described below. |
893 |
|
894 |
Each key in the hash is the path inside the set, without a leading slash, |
895 |
e.g.: |
896 |
|
897 |
bin/perl |
898 |
lib/unicore/lib/Blk/Superscr.pl |
899 |
lib/AnyEvent/Impl/EV.pm |
900 |
|
901 |
The value is an array reference with mostly unspecified contents, except |
902 |
the first element, which is the file system path where the actual file can |
903 |
be found. |
904 |
|
905 |
This code snippet lists all files inside the set: |
906 |
|
907 |
print "$_\n" |
908 |
for sort keys %{ $extractor->set }); |
909 |
|
910 |
This code fragment prints C<< filesystem_path => set_path >> pairs for all |
911 |
files in the set: |
912 |
|
913 |
my $set = $extractor->set; |
914 |
while (my ($set,$fspath) = each %$set) { |
915 |
print "$fspath => $set\n"; |
916 |
} |
917 |
|
918 |
You can implement your own filtering by asking for the result set with |
919 |
C<< $extractor->set >>, and then deleting keys from the referenced hash |
920 |
- since you can ask for the result set at any time you can add things, |
921 |
filter them out this way, and add additional things. |
922 |
|
923 |
=back |
924 |
|
925 |
=cut |
926 |
|
927 |
sub set { |
928 |
$_[0]->_trace_flush; |
929 |
$_[0]{set} |
930 |
} |
931 |
|
932 |
=head1 EXAMPLE |
933 |
|
934 |
To package he deliantra client (L<Deliantra::Client>), finding all |
935 |
(perl) files needed to run it is a first step. This can be done by using |
936 |
something like the following code snippet: |
937 |
|
938 |
my $ex = new Perl::LibExtractor; |
939 |
|
940 |
$ex->add_perl; |
941 |
$ex->add_core_support; |
942 |
$ex->add_bin ("deliantra"); |
943 |
$ex->add_mod ("AnyEvent::Impl::EV"); |
944 |
$ex->add_mod ("AnyEvent::Impl::Perl"); |
945 |
$ex->add_mod ("Urlader"); |
946 |
$ex->filter ("-/*/auto/POSIX/**.al"); |
947 |
$ex->runtime_only; |
948 |
|
949 |
First it sets the perl library directory to F<pm> and F<.> (the latter |
950 |
to work around some AutoLoader bugs), so perl uses only the perl library |
951 |
files that came with the binary package. |
952 |
|
953 |
Then it sets some environment variable to override the system default |
954 |
(which might be incompatible). |
955 |
|
956 |
Then it runs the client itself, using C<require>. Since C<require> only |
957 |
looks in the perl library directory this is the reaosn why the scripts |
958 |
were put there (of course, since F<.> is also included it doesn't matter, |
959 |
but I refuse to yield to bugs). |
960 |
|
961 |
Finally it exits with a clean status to signal "ok" to Urlader. |
962 |
|
963 |
Back to the original C<Perl::LibExtractor> script: after initialising a |
964 |
new set, the script simply adds the F<perl> interpreter and core support |
965 |
files (just in case, not all are needed, but some are, and I am too lazy |
966 |
to find out which ones exactly). |
967 |
|
968 |
Then it adds the deliantra executable itself, which in turn adds most of |
969 |
the required modules. After that, the AnyEvent implementation modules are |
970 |
added because these dependencies are not picked up automatically. |
971 |
|
972 |
The L<Urlader> module is added because the client itself does not depend |
973 |
on it at all, but the wrapper does. |
974 |
|
975 |
At this point, all required files are present, and it's time to slim |
976 |
down: most of the ueseless POSIX autoloaded functions are removed, |
977 |
not because they are so big, but because creating files is a costly |
978 |
operation in itself, so even small fiels have considerable overhead when |
979 |
unpacking. Then files not required for running the client are removed. |
980 |
|
981 |
And that concludes it, the set is now ready. |
982 |
|
983 |
=head1 SEE ALSO |
984 |
|
985 |
The utility program that comes with this module: L<perl-libextractor>. |
986 |
|
987 |
L<App::Staticperl>, L<Urlader>, L<Perl::Squish>. |
988 |
|
989 |
=head1 LICENSE |
990 |
|
991 |
This software package is licensed under the GPL version 3 or any later |
992 |
version, see COPYING for details. |
993 |
|
994 |
This license does not, of course, apply to any output generated by this |
995 |
software. |
996 |
|
997 |
=head1 AUTHOR |
998 |
|
999 |
Marc Lehmann <schmorp@schmorp.de> |
1000 |
http://home.schmorp.de/ |
1001 |
|
1002 |
=cut |
1003 |
|
1004 |
1; |
1005 |
|