1 |
=head1 NAME |
2 |
|
3 |
Perl::LibExtractor - determine perl library subsets for building distributions |
4 |
|
5 |
=head1 SYNOPSIS |
6 |
|
7 |
use Perl::LibExtractor; |
8 |
|
9 |
=head1 DESCRIPTION |
10 |
|
11 |
The purpose of this module is to determine subsets of your perl library, |
12 |
that is, a set of files needed to satisfy certain dependencies (e.g. of a |
13 |
program). |
14 |
|
15 |
The goal is to extract a part of your perl installation including |
16 |
dependencies. A typical use case for this module would be to find out |
17 |
which files are needed to be build a L<PAR> distribution, to link into |
18 |
an L<App::Staticperl> binary, or to pack with L<Urlader>, to create |
19 |
stand-alone distributions tailormade to run your app. |
20 |
|
21 |
=head1 METHODS |
22 |
|
23 |
To use this module, first call the C<new>-constructor and then as many |
24 |
other methods as you want, to generate a set of files. Then query the set of |
25 |
files and do whatever you want with them. |
26 |
|
27 |
The command-line utility F<perl-libextract> can be a convenient |
28 |
alternative to using this module directly, and offers a few extra options, |
29 |
such as to copy out the files into a new directory, strip them and/or |
30 |
manipulate them in other ways. |
31 |
|
32 |
=over 4 |
33 |
|
34 |
=cut |
35 |
|
36 |
package Perl::LibExtractor; |
37 |
|
38 |
our $VERSION = '0.1'; |
39 |
|
40 |
use Config; |
41 |
use File::Temp (); |
42 |
|
43 |
use common::sense; |
44 |
|
45 |
sub I_SRC () { 0 } |
46 |
sub I_DEP () { 1 } |
47 |
|
48 |
sub croak($) { |
49 |
require Carp; |
50 |
Carp::croak "(Perl::LibExtractor) $_[0]"; |
51 |
} |
52 |
|
53 |
=item $extractor = new Perl::LibExtractor [key => value...] |
54 |
|
55 |
Creates a new extractor object. Each extractor object stores some |
56 |
configuration options and a subset of files that can be queried at any |
57 |
time,. |
58 |
|
59 |
The following key-value pairs exist, with default values as specified. |
60 |
|
61 |
=over 4 |
62 |
|
63 |
=item exedir => "bin" |
64 |
|
65 |
The prefix to use for the suggested target path for perl |
66 |
executables. Defaults to F<bin>. |
67 |
|
68 |
=item libdir => "lib" |
69 |
|
70 |
The prefix to use for the suggested target path of perl library |
71 |
files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files |
72 |
etc.). Defaults to F<lib>. |
73 |
|
74 |
=item bindir => "bin" |
75 |
|
76 |
The prefix to use for the suggested target path for (non-perl) |
77 |
executables. Defaults to F<bin>. |
78 |
|
79 |
=item dlldir => "bin" |
80 |
|
81 |
The prefix to use for the suggested target path of any shared |
82 |
libraries. Defaults to F<bin>. |
83 |
|
84 |
=item inc => \@INC without "." |
85 |
|
86 |
An arrayref with paths to perl library directories. The default is |
87 |
C<\@INC>, with F<.> removed. |
88 |
|
89 |
To prepend custom dirs just do this: |
90 |
|
91 |
inc => ["mydir", @INC], |
92 |
|
93 |
=item use_packlist => 1 |
94 |
|
95 |
Enable (if true) or disable the use of C<.packlist> files. If enabled, |
96 |
then each time a module is included, the complete distribution that |
97 |
contains it is included (and traced) as well. See L<ALGORITHMS>, below. |
98 |
|
99 |
=back |
100 |
|
101 |
=cut |
102 |
|
103 |
sub new { |
104 |
my ($class, %kv) = @_; |
105 |
|
106 |
my $self = bless { |
107 |
exedir => "bin", |
108 |
libdir => "lib", |
109 |
bindir => "bin", |
110 |
dlldir => "bin", |
111 |
inc => [grep $_ ne ".", @INC], |
112 |
use_packlist => 1, |
113 |
%kv, |
114 |
set => {}, |
115 |
}, $class; |
116 |
|
117 |
my %inc_seen; |
118 |
my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} }; |
119 |
$self->{inc} = \@inc; |
120 |
|
121 |
$self->_set_inc; |
122 |
|
123 |
$self |
124 |
} |
125 |
|
126 |
sub _perl_path() { |
127 |
my $secure_perl_path = $Config{perlpath}; |
128 |
|
129 |
if ($^O ne 'VMS') { |
130 |
$secure_perl_path .= $Config{_exe} |
131 |
unless $secure_perl_path =~ m/$Config{_exe}$/i; |
132 |
} |
133 |
|
134 |
$secure_perl_path |
135 |
} |
136 |
|
137 |
sub _set_inc { |
138 |
my ($self) = @_; |
139 |
|
140 |
my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }}; |
141 |
$matchprefix = qr<^(?:$matchprefix)/>; |
142 |
|
143 |
my %lib; |
144 |
my @packlists; |
145 |
|
146 |
# find all files in all libdirs, earlier ones overwrite later ones |
147 |
my @scan = map [$_, ""], @{ $self->{inc} }; |
148 |
|
149 |
while (@scan) { |
150 |
my ($root, $dir) = @{ pop @scan }; |
151 |
|
152 |
my $pfx = length $dir ? "$dir/" : ""; |
153 |
|
154 |
for (do { |
155 |
opendir my $fh, "$root/$dir" |
156 |
or croak "$root/$dir: $!"; |
157 |
grep !/^\.\.?$/, readdir $fh |
158 |
}) { |
159 |
if (-d "$root/$dir/$_/.") { |
160 |
$lib{"$pfx$_/"} = "$root/$pfx$_"; |
161 |
push @scan, [$root, "$pfx$_"]; |
162 |
} elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) { |
163 |
push @packlists, [$root, $pfx]; |
164 |
} elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") { |
165 |
# skip empty .bs files |
166 |
# } elsif (/\.(?:pod|h|html)$/) { |
167 |
# # not interested in those |
168 |
} else { |
169 |
#push @files, $_; |
170 |
$lib{"$pfx$_"} = "$root/$pfx$_"; |
171 |
} |
172 |
} |
173 |
|
174 |
#$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite |
175 |
} |
176 |
|
177 |
my %packlist; |
178 |
|
179 |
# need to go forward here |
180 |
for (@packlists) { |
181 |
my ($root, $auto) = @$_; |
182 |
|
183 |
my @packlist; |
184 |
|
185 |
open my $fh, "<:perlio", "$root/$auto/.packlist" |
186 |
or die "$root/$auto/.packlist: $!"; |
187 |
|
188 |
$root = qr<^\Q$root/>; |
189 |
|
190 |
while (<$fh>) { |
191 |
chomp; |
192 |
s/ .*$//; # newer-style .packlists might contain key=value pairs |
193 |
s%/\./%/%g; # yeah, these too |
194 |
|
195 |
s/$root// or next; |
196 |
exists $lib{$_} or next; |
197 |
|
198 |
push @packlist, $_; |
199 |
$packlist{$_} = \@packlist; |
200 |
} |
201 |
} |
202 |
|
203 |
$self->{lib} = \%lib; |
204 |
$self->{packlist} = \%packlist; |
205 |
$self->{matchprefix} = $matchprefix; |
206 |
} |
207 |
|
208 |
sub _add { |
209 |
my ($self, $add) = @_; |
210 |
|
211 |
my $lib = $self->{lib}; |
212 |
my $path; |
213 |
|
214 |
for (@$add) { |
215 |
$path = "$self->{libdir}/$_"; |
216 |
|
217 |
$self->{set}{$path} ||= do { |
218 |
my @info; |
219 |
|
220 |
$info[I_SRC] = $lib->{$_} |
221 |
or croak "$_: unable to locate file in perl library"; |
222 |
|
223 |
if ($self->{use_packlist} && exists $self->{packlist}{$_}) { |
224 |
$self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die] |
225 |
for @{ $self->{packlist}{$_} }; |
226 |
|
227 |
# for (grep /\.pm$/, @{ $self->{packlist}{$_} }) { |
228 |
# s/\.pm$//; |
229 |
# s%/%::%g; |
230 |
# my $pkg = "libextractor" . ++$self->{count}; |
231 |
# $self->add_eval ("{ package $pkg; eval 'use $_' }") |
232 |
# unless $self->{_add_do}{$_}++; |
233 |
# } |
234 |
# |
235 |
# $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00") |
236 |
# for grep /\.pl$/, @{ $self->{packlist}{$_} }; |
237 |
|
238 |
} elsif (/^(.*)\.pm$/) { |
239 |
(my $auto = "auto/$1/") =~ s%::%/%g; |
240 |
$auto =~ m%/([^/]+)/$% or die; |
241 |
my $base = $1; |
242 |
|
243 |
if (exists $lib->{$auto}) { |
244 |
# auto dir exists, scan it for cool stuff |
245 |
|
246 |
# 1. shared object, others are of no interest to us |
247 |
my $so = "$auto$base.$Config{dlext}"; |
248 |
if (my $src = $lib->{$so}) { |
249 |
$so = "$self->{libdir}/$so"; |
250 |
push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src]; |
251 |
} |
252 |
|
253 |
# 2. autoloader/autosplit |
254 |
my $ix = "${auto}autosplit.ix"; |
255 |
if (my $src = $lib->{$ix}) { |
256 |
$ix = "$self->{libdir}/$ix"; |
257 |
push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src]; |
258 |
|
259 |
open my $fh, "<:perlio", $src |
260 |
or croak "$src: $!"; |
261 |
|
262 |
my $package; |
263 |
|
264 |
while (<$fh>) { |
265 |
if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) { |
266 |
my $al = "auto/$package/$1.al"; |
267 |
my $src = $lib->{$al} |
268 |
or croak "$al: autoload file not found, but should be there."; |
269 |
|
270 |
$al = "$self->{libdir}/$al"; |
271 |
push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src]; |
272 |
|
273 |
} elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) { |
274 |
($package = $1) =~ s/::/\//g; |
275 |
} elsif (/^\s*(?:#|1?\s*;?\s*$)/) { |
276 |
# nop |
277 |
} else { |
278 |
warn "WARNING: $src: unparsable line, please report: $_"; |
279 |
} |
280 |
} |
281 |
} |
282 |
|
283 |
skip: |
284 |
} |
285 |
} |
286 |
|
287 |
\@info |
288 |
}; |
289 |
} |
290 |
} |
291 |
|
292 |
sub _trace { |
293 |
my ($self, $file, $eval) = @_; |
294 |
|
295 |
$self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n"; |
296 |
} |
297 |
|
298 |
sub _trace_flush { |
299 |
my ($self) = @_; |
300 |
|
301 |
# ->_add might add additional files to trace |
302 |
while (exists $self->{trace_begin} or exists $self->{trace_check}) { |
303 |
my $tmpdir = newdir File::Temp; |
304 |
my $dir = $tmpdir->dirname; |
305 |
|
306 |
open my $fh, ">:perlio", "$dir/eval" |
307 |
or croak "$dir/eval: $!"; |
308 |
syswrite $fh, |
309 |
'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n" |
310 |
. "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n" |
311 |
. 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n" |
312 |
. "CHECK {\n" |
313 |
. 'open STDOUT, ">:raw", "out" or die "out: $!";' |
314 |
. 'print join "\x00", values %INC;' |
315 |
. 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl |
316 |
. "}\n" |
317 |
. (delete $self->{trace_check}); |
318 |
close $fh; |
319 |
|
320 |
system _perl_path, "-c", "$dir/eval" |
321 |
and croak "trace failure, check trace process output - caught"; |
322 |
|
323 |
my @inc = split /\x00/, do { |
324 |
open my $fh, "<:perlio", "$dir/out" |
325 |
or croak "$dir/out: $!"; |
326 |
local $/; |
327 |
scalar readline $fh |
328 |
}; |
329 |
|
330 |
my $matchprefix = $self->{matchprefix}; |
331 |
|
332 |
# remove the library directory prefix, hope for the best |
333 |
s/$matchprefix// |
334 |
or croak "$_: file outside any library directory" |
335 |
for @inc; |
336 |
|
337 |
$self->_add (\@inc); |
338 |
} |
339 |
} |
340 |
|
341 |
=item $extractor->add_mod ($module[, $module...]) |
342 |
|
343 |
Adds the given module(s) to the file set - the module name must be specified |
344 |
as in C<use>, i.e. with C<::> as separators and without F<.pm>. |
345 |
|
346 |
The program will be loaded with the default import list, any dependent |
347 |
files, such as the shared object implementing xs functions, or autoload |
348 |
files, will also be added. |
349 |
|
350 |
Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files |
351 |
from the distribution they are part of. |
352 |
|
353 |
$extractor->add_mod ("Coro", "AnyEvent::AIO"); |
354 |
|
355 |
=cut |
356 |
|
357 |
sub add_mod { |
358 |
my $self = shift; |
359 |
|
360 |
for (@_) { |
361 |
my $pkg = "libextractor" . ++$self->{count}; |
362 |
$self->_trace ("use $_", "{ package $pkg; use $_ }") |
363 |
unless $self->{add_mod}{$_}++; |
364 |
} |
365 |
} |
366 |
|
367 |
=item $extractor->add_exe ($name[, $name...]) |
368 |
|
369 |
Adds the given (perl) program(s) to the file set, that is, a program |
370 |
installed by some perl module, written in perl (an example would be the |
371 |
L<perl-libextract> program that is part of the C<Perl::LibExtractor> |
372 |
distribution). |
373 |
|
374 |
Example: add the deliantra client installed by the L<Deliantra::Client> |
375 |
module. |
376 |
|
377 |
$extractor->add_exe ("deliantra"); |
378 |
|
379 |
=cut |
380 |
|
381 |
sub add_exe { |
382 |
my $self = shift; |
383 |
|
384 |
exe: |
385 |
for my $exe (@_) { |
386 |
for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) { |
387 |
if (open my $fh, "<:perlio", "$dir/$exe") { |
388 |
|
389 |
my $file = do { local $/; readline $fh }; |
390 |
|
391 |
$self->_trace_flush if exists $self->{trace_check}; |
392 |
$self->{trace_check} = $file; |
393 |
|
394 |
$self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"]; |
395 |
next exe; |
396 |
} |
397 |
} |
398 |
|
399 |
croak "add_exe ($exe): executable not found"; |
400 |
} |
401 |
} |
402 |
|
403 |
=item $extractor->add_eval ($string) |
404 |
|
405 |
Evaluates the string and adds all modules that are loaded by it. For |
406 |
example, this would add L<AnyEvent> and the default backend implementation |
407 |
module and event loop module: |
408 |
|
409 |
$extractor->add_eval ("use AnyEvent; AnyEvent::detect"); |
410 |
|
411 |
=cut |
412 |
|
413 |
sub add_eval { |
414 |
my ($self, $eval) = @_; |
415 |
|
416 |
$eval =~ s/\x00/\x00."\\x00".q\x00/g; |
417 |
$self->_trace ($eval, |
418 |
"local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8 |
419 |
. "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n" |
420 |
); |
421 |
} |
422 |
|
423 |
=item $extractor->add_perl |
424 |
|
425 |
Adds the perl binary itself to the file set, including the libperl dll, if |
426 |
needed. |
427 |
|
428 |
=cut |
429 |
|
430 |
sub add_perl { |
431 |
my ($self) = @_; |
432 |
|
433 |
$self->{set}{"$self->{bindir}/perl$Config{_exe}"} = [_perl_path]; |
434 |
|
435 |
# on debian, we have the special case of a perl binary linked against |
436 |
# a static libperl.a (which is not available), but the Config says to use |
437 |
# a shared library, which is in the wrong directory, too (which breaks |
438 |
# every other perl installation on the system - they are so stupid). |
439 |
|
440 |
# that means we can't find the libperl.so, because dbeian actively breaks |
441 |
# their perl install, and we don't need it. we work around this by silently |
442 |
# not including the libperl if we cannot find it. |
443 |
|
444 |
if ($Config{useshrplib} eq "true") { |
445 |
if (my $libperl = $self->{lib}{"CORE/$Config{libperl}"}) { |
446 |
$self->{set}{"$self->{dlldir}/$Config{libperl}"} = $libperl; |
447 |
} |
448 |
} |
449 |
} |
450 |
|
451 |
=item $extractor->add_core_support |
452 |
|
453 |
Try to add modules and files needed to support commonly-used builtin |
454 |
language features. For example to open a scalar for I/O you need the |
455 |
L<PerlIO::scalar> module: |
456 |
|
457 |
open $fh, "<", \$scalar |
458 |
|
459 |
A number of regex and string features (e.g. C<ucfirst>) need some unicore |
460 |
files, e.g.: |
461 |
|
462 |
'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i'; |
463 |
|
464 |
This call adds these files (simply by executing code similar to the above |
465 |
code fragments). |
466 |
|
467 |
Notable things that are missing are other PerlIO layers, such as |
468 |
L<PerlIO::encoding>, and named character and character class matches. |
469 |
|
470 |
=cut |
471 |
|
472 |
sub add_core_support { |
473 |
my ($self) = @_; |
474 |
|
475 |
$self->add_eval ('my $v; open my $fh, "<", \$v'); |
476 |
$self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i'); |
477 |
} |
478 |
|
479 |
=item $extractor->add_unicore |
480 |
|
481 |
#TODO |
482 |
|
483 |
=item $extractor->add_glob ($modglob[, $modglob...]) |
484 |
|
485 |
#TODO# |
486 |
|
487 |
=item $extractor->filter ($pattern[, $pattern...]) |
488 |
|
489 |
Applies a series of include/exclude filters. Each filter must start |
490 |
with either C<+> or C<->, to designate the pattern as I<include> or |
491 |
I<exclude> pattern. The rest of the pattern is an extended glob pattern |
492 |
(see L<EXTENDED GLOB PATTERNS>). |
493 |
|
494 |
Each pattern is instantly applied, and all matching files will be |
495 |
permanently included or excluded, that is, if you have an include pattern |
496 |
then all files that were matched by it will be included in the set, |
497 |
regardless of any further exclude patterns matching the same files. |
498 |
|
499 |
Likewise, any file excluded by a pattern will not be included in the set, |
500 |
even if matches by later include patterns. |
501 |
|
502 |
Any files not matched by any expression will be included, that is, the |
503 |
filter list has an implicit C<+/**> pattern at the end. |
504 |
|
505 |
=cut |
506 |
|
507 |
sub _extglob2re { |
508 |
my $self = shift; |
509 |
|
510 |
for (quotemeta $_[0]) { |
511 |
s/\\\*\\\*/.*/g; |
512 |
s/\\\*/[^\/]*/g; |
513 |
s/\\\?/[^\/]/g; |
514 |
|
515 |
unless (s%^\\/%%) { |
516 |
s%\\:\\:%/%g; |
517 |
$_ = (quotemeta $self->{libdir}) . "/$_\\.pm"; |
518 |
} |
519 |
|
520 |
$_ .= '$'; |
521 |
s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end |
522 |
|
523 |
return qr<^$_>s |
524 |
} |
525 |
} |
526 |
|
527 |
sub filter { |
528 |
my ($self, @patterns) = @_; |
529 |
|
530 |
$self->_trace_flush; |
531 |
|
532 |
my $set = $self->{set}; |
533 |
my %include; |
534 |
|
535 |
for my $pat (@patterns) { |
536 |
$pat =~ s/^([+\-])// |
537 |
or croak "$_: not a valid filter pattern (missing + or - prefix)"; |
538 |
my $inc = $1 eq "+"; |
539 |
$pat = $self->_extglob2re ($pat); |
540 |
|
541 |
my @match = grep /$pat/, keys %$set; |
542 |
|
543 |
if ($inc) { |
544 |
@include{@match} = delete @$set{@match}; |
545 |
} else { |
546 |
delete @$set{@{ $_->[I_DEP] }} # remove dependents |
547 |
for delete @$set{@match}; |
548 |
} |
549 |
} |
550 |
|
551 |
my @include = keys %include; |
552 |
@$set{@include} = delete @include{@include}; |
553 |
} |
554 |
|
555 |
=item $extractor->runtime_only |
556 |
|
557 |
This removes all files that are not needed at runtime, such as static |
558 |
archives, header and other files needed only for compilation of modules, |
559 |
and pod and html files (which are unlikely to be needed at runtime). |
560 |
|
561 |
This is quite useful when you want to have only fiels actually needed to |
562 |
execute a program. |
563 |
|
564 |
=cut |
565 |
|
566 |
sub runtime_only { |
567 |
my ($self) = @_; |
568 |
|
569 |
$self->_trace_flush; |
570 |
|
571 |
my $set = $self->{set}; |
572 |
|
573 |
# delete all static libraries |
574 |
delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set }; |
575 |
|
576 |
# delete all extralibs.ld and extralibs.all (no clue what the latter is for) |
577 |
delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set }; |
578 |
|
579 |
# delete all .pod, .h, .html files (hopefully none of them are used at runtime) |
580 |
delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set }; |
581 |
} |
582 |
|
583 |
=back |
584 |
|
585 |
=head1 ALGORITHMS |
586 |
#TODO |
587 |
|
588 |
=head2 Module/trace-based additions |
589 |
#TODO |
590 |
|
591 |
For example, when using L<Coro::AnyEvent> or |
592 |
L<AnyEvent::DNS> are added, then also all (relevant) files from the |
593 |
L<Coro> and L<AnyEvent> distributions will be included. |
594 |
|
595 |
The only exception is perl itself |
596 |
|
597 |
=head2 Glob/path-based modifications |
598 |
#TODO |
599 |
|
600 |
=head1 EXTENDED GLOB PATTERNS |
601 |
#TODO |
602 |
|
603 |
=head1 SEE ALSO |
604 |
|
605 |
The utility program that comes with this module: L<perl-libextract>. |
606 |
|
607 |
L<App::Staticperl>, L<Urlader>, L<Perl::Squish>. |
608 |
|
609 |
=head1 AUTHOR |
610 |
|
611 |
Marc Lehmann <schmorp@schmorp.de> |
612 |
http://home.schmorp.de/ |
613 |
|
614 |
=cut |
615 |
|
616 |
1; |
617 |
|