ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.4
Committed: Sat Jan 14 21:38:34 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.3: +35 -10 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set of
25 files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =over 4
33
34 =cut
35
36 package Perl::LibExtractor;
37
38 our $VERSION = '0.1';
39
40 use Config;
41 use File::Temp ();
42
43 use common::sense;
44
45 sub I_SRC () { 0 }
46 sub I_DEP () { 1 }
47
48 sub croak($) {
49 require Carp;
50 Carp::croak "(Perl::LibExtractor) $_[0]";
51 }
52
53 =item $extractor = new Perl::LibExtractor [key => value...]
54
55 Creates a new extractor object. Each extractor object stores some
56 configuration options and a subset of files that can be queried at any
57 time,.
58
59 The following key-value pairs exist, with default values as specified.
60
61 =over 4
62
63 =item exedir => "bin"
64
65 The prefix to use for the suggested target path for perl
66 executables. Defaults to F<bin>.
67
68 =item libdir => "lib"
69
70 The prefix to use for the suggested target path of perl library
71 files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72 etc.). Defaults to F<lib>.
73
74 =item bindir => "bin"
75
76 The prefix to use for the suggested target path for (non-perl)
77 executables. Defaults to F<bin>.
78
79 =item dlldir => "bin"
80
81 The prefix to use for the suggested target path of any shared
82 libraries. Defaults to F<bin>.
83
84 =item inc => \@INC without "."
85
86 An arrayref with paths to perl library directories. The default is
87 C<\@INC>, with F<.> removed.
88
89 To prepend custom dirs just do this:
90
91 inc => ["mydir", @INC],
92
93 =item use_packlist => 1
94
95 Enable (if true) or disable the use of C<.packlist> files. If enabled,
96 then each time a module is included, the complete distribution that
97 contains it is included (and traced) as well. See L<ALGORITHMS>, below.
98
99 =back
100
101 =cut
102
103 sub new {
104 my ($class, %kv) = @_;
105
106 my $self = bless {
107 exedir => "bin",
108 libdir => "lib",
109 bindir => "bin",
110 dlldir => "bin",
111 inc => [grep $_ ne ".", @INC],
112 use_packlist => 1,
113 %kv,
114 set => {},
115 }, $class;
116
117 my %inc_seen;
118 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
119 $self->{inc} = \@inc;
120
121 $self->_set_inc;
122
123 $self
124 }
125
126 sub _perl_path() {
127 my $secure_perl_path = $Config{perlpath};
128
129 if ($^O ne 'VMS') {
130 $secure_perl_path .= $Config{_exe}
131 unless $secure_perl_path =~ m/$Config{_exe}$/i;
132 }
133
134 $secure_perl_path
135 }
136
137 sub _set_inc {
138 my ($self) = @_;
139
140 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
141 $matchprefix = qr<^(?:$matchprefix)/>;
142
143 my %lib;
144 my @packlists;
145
146 # find all files in all libdirs, earlier ones overwrite later ones
147 my @scan = map [$_, ""], @{ $self->{inc} };
148
149 while (@scan) {
150 my ($root, $dir) = @{ pop @scan };
151
152 my $pfx = length $dir ? "$dir/" : "";
153
154 for (do {
155 opendir my $fh, "$root/$dir"
156 or croak "$root/$dir: $!";
157 grep !/^\.\.?$/, readdir $fh
158 }) {
159 if (-d "$root/$dir/$_/.") {
160 $lib{"$pfx$_/"} = "$root/$pfx$_";
161 push @scan, [$root, "$pfx$_"];
162 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
163 push @packlists, [$root, $pfx];
164 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
165 # skip empty .bs files
166 # } elsif (/\.(?:pod|h|html)$/) {
167 # # not interested in those
168 } else {
169 #push @files, $_;
170 $lib{"$pfx$_"} = "$root/$pfx$_";
171 }
172 }
173
174 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
175 }
176
177 my %packlist;
178
179 # need to go forward here
180 for (@packlists) {
181 my ($root, $auto) = @$_;
182
183 my @packlist;
184
185 open my $fh, "<:perlio", "$root/$auto/.packlist"
186 or die "$root/$auto/.packlist: $!";
187
188 $root = qr<^\Q$root/>;
189
190 while (<$fh>) {
191 chomp;
192 s/ .*$//; # newer-style .packlists might contain key=value pairs
193 s%/\./%/%g; # yeah, these too
194
195 s/$root// or next;
196 exists $lib{$_} or next;
197
198 push @packlist, $_;
199 $packlist{$_} = \@packlist;
200 }
201 }
202
203 $self->{lib} = \%lib;
204 $self->{packlist} = \%packlist;
205 $self->{matchprefix} = $matchprefix;
206 }
207
208 sub _add {
209 my ($self, $add) = @_;
210
211 my $lib = $self->{lib};
212 my $path;
213
214 for (@$add) {
215 $path = "$self->{libdir}/$_";
216
217 $self->{set}{$path} ||= do {
218 my @info;
219
220 $info[I_SRC] = $lib->{$_}
221 or croak "$_: unable to locate file in perl library";
222
223 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
224 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
225 for @{ $self->{packlist}{$_} };
226
227 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
228 # s/\.pm$//;
229 # s%/%::%g;
230 # my $pkg = "libextractor" . ++$self->{count};
231 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
232 # unless $self->{_add_do}{$_}++;
233 # }
234 #
235 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
236 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
237
238 } elsif (/^(.*)\.pm$/) {
239 (my $auto = "auto/$1/") =~ s%::%/%g;
240 $auto =~ m%/([^/]+)/$% or die;
241 my $base = $1;
242
243 if (exists $lib->{$auto}) {
244 # auto dir exists, scan it for cool stuff
245
246 # 1. shared object, others are of no interest to us
247 my $so = "$auto$base.$Config{dlext}";
248 if (my $src = $lib->{$so}) {
249 $so = "$self->{libdir}/$so";
250 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
251 }
252
253 # 2. autoloader/autosplit
254 my $ix = "${auto}autosplit.ix";
255 if (my $src = $lib->{$ix}) {
256 $ix = "$self->{libdir}/$ix";
257 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
258
259 open my $fh, "<:perlio", $src
260 or croak "$src: $!";
261
262 my $package;
263
264 while (<$fh>) {
265 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
266 my $al = "auto/$package/$1.al";
267 my $src = $lib->{$al}
268 or croak "$al: autoload file not found, but should be there.";
269
270 $al = "$self->{libdir}/$al";
271 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
272
273 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
274 ($package = $1) =~ s/::/\//g;
275 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
276 # nop
277 } else {
278 warn "WARNING: $src: unparsable line, please report: $_";
279 }
280 }
281 }
282
283 skip:
284 }
285 }
286
287 \@info
288 };
289 }
290 }
291
292 sub _trace {
293 my ($self, $file, $eval) = @_;
294
295 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
296 }
297
298 sub _trace_flush {
299 my ($self) = @_;
300
301 # ->_add might add additional files to trace
302 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
303 my $tmpdir = newdir File::Temp;
304 my $dir = $tmpdir->dirname;
305
306 open my $fh, ">:perlio", "$dir/eval"
307 or croak "$dir/eval: $!";
308 syswrite $fh,
309 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
310 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
311 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
312 . "CHECK {\n"
313 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
314 . 'print join "\x00", values %INC;'
315 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
316 . "}\n"
317 . (delete $self->{trace_check});
318 close $fh;
319
320 system _perl_path, "-c", "$dir/eval"
321 and croak "trace failure, check trace process output - caught";
322
323 my @inc = split /\x00/, do {
324 open my $fh, "<:perlio", "$dir/out"
325 or croak "$dir/out: $!";
326 local $/;
327 scalar readline $fh
328 };
329
330 my $matchprefix = $self->{matchprefix};
331
332 # remove the library directory prefix, hope for the best
333 s/$matchprefix//
334 or croak "$_: file outside any library directory"
335 for @inc;
336
337 $self->_add (\@inc);
338 }
339 }
340
341 =item $extractor->add_mod ($module[, $module...])
342
343 Adds the given module(s) to the file set - the module name must be specified
344 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
345
346 The program will be loaded with the default import list, any dependent
347 files, such as the shared object implementing xs functions, or autoload
348 files, will also be added.
349
350 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
351 from the distribution they are part of.
352
353 $extractor->add_mod ("Coro", "AnyEvent::AIO");
354
355 =cut
356
357 sub add_mod {
358 my $self = shift;
359
360 for (@_) {
361 my $pkg = "libextractor" . ++$self->{count};
362 $self->_trace ("use $_", "{ package $pkg; use $_ }")
363 unless $self->{add_mod}{$_}++;
364 }
365 }
366
367 =item $extractor->add_exe ($name[, $name...])
368
369 Adds the given (perl) program(s) to the file set, that is, a program
370 installed by some perl module, written in perl (an example would be the
371 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
372 distribution).
373
374 Example: add the deliantra client installed by the L<Deliantra::Client>
375 module.
376
377 $extractor->add_exe ("deliantra");
378
379 =cut
380
381 sub add_exe {
382 my $self = shift;
383
384 exe:
385 for my $exe (@_) {
386 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
387 if (open my $fh, "<:perlio", "$dir/$exe") {
388
389 my $file = do { local $/; readline $fh };
390
391 $self->_trace_flush if exists $self->{trace_check};
392 $self->{trace_check} = $file;
393
394 $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
395 next exe;
396 }
397 }
398
399 croak "add_exe ($exe): executable not found";
400 }
401 }
402
403 =item $extractor->add_eval ($string)
404
405 Evaluates the string and adds all modules that are loaded by it. For
406 example, this would add L<AnyEvent> and the default backend implementation
407 module and event loop module:
408
409 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
410
411 =cut
412
413 sub add_eval {
414 my ($self, $eval) = @_;
415
416 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
417 $self->_trace ($eval,
418 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
419 . "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
420 );
421 }
422
423 =item $extractor->add_perl
424
425 Adds the perl binary itself to the file set, including the libperl dll, if
426 needed.
427
428 =cut
429
430 sub add_perl {
431 my ($self) = @_;
432
433 $self->{set}{"$self->{bindir}/perl$Config{_exe}"} = [_perl_path];
434
435 # on debian, we have the special case of a perl binary linked against
436 # a static libperl.a (which is not available), but the Config says to use
437 # a shared library, which is in the wrong directory, too (which breaks
438 # every other perl installation on the system - they are so stupid).
439
440 # that means we can't find the libperl.so, because dbeian actively breaks
441 # their perl install, and we don't need it. we work around this by silently
442 # not including the libperl if we cannot find it.
443
444 if ($Config{useshrplib} eq "true") {
445 if (my $libperl = $self->{lib}{"CORE/$Config{libperl}"}) {
446 $self->{set}{"$self->{dlldir}/$Config{libperl}"} = $libperl;
447 }
448 }
449 }
450
451 =item $extractor->add_core_support
452
453 Try to add modules and files needed to support commonly-used builtin
454 language features. For example to open a scalar for I/O you need the
455 L<PerlIO::scalar> module:
456
457 open $fh, "<", \$scalar
458
459 A number of regex and string features (e.g. C<ucfirst>) need some unicore
460 files, e.g.:
461
462 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
463
464 This call adds these files (simply by executing code similar to the above
465 code fragments).
466
467 Notable things that are missing are other PerlIO layers, such as
468 L<PerlIO::encoding>, and named character and character class matches.
469
470 =cut
471
472 sub add_core_support {
473 my ($self) = @_;
474
475 $self->add_eval ('my $v; open my $fh, "<", \$v');
476 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
477 }
478
479 =item $extractor->add_unicore
480
481 #TODO
482
483 =item $extractor->add_glob ($modglob[, $modglob...])
484
485 #TODO#
486
487 =item $extractor->filter ($pattern[, $pattern...])
488
489 Applies a series of include/exclude filters. Each filter must start
490 with either C<+> or C<->, to designate the pattern as I<include> or
491 I<exclude> pattern. The rest of the pattern is an extended glob pattern
492 (see L<EXTENDED GLOB PATTERNS>).
493
494 Each pattern is instantly applied, and all matching files will be
495 permanently included or excluded, that is, if you have an include pattern
496 then all files that were matched by it will be included in the set,
497 regardless of any further exclude patterns matching the same files.
498
499 Likewise, any file excluded by a pattern will not be included in the set,
500 even if matches by later include patterns.
501
502 Any files not matched by any expression will be included, that is, the
503 filter list has an implicit C<+/**> pattern at the end.
504
505 =cut
506
507 sub _extglob2re {
508 my $self = shift;
509
510 for (quotemeta $_[0]) {
511 s/\\\*\\\*/.*/g;
512 s/\\\*/[^\/]*/g;
513 s/\\\?/[^\/]/g;
514
515 unless (s%^\\/%%) {
516 s%\\:\\:%/%g;
517 $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
518 }
519
520 $_ .= '$';
521 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
522
523 return qr<^$_>s
524 }
525 }
526
527 sub filter {
528 my ($self, @patterns) = @_;
529
530 $self->_trace_flush;
531
532 my $set = $self->{set};
533 my %include;
534
535 for my $pat (@patterns) {
536 $pat =~ s/^([+\-])//
537 or croak "$_: not a valid filter pattern (missing + or - prefix)";
538 my $inc = $1 eq "+";
539 $pat = $self->_extglob2re ($pat);
540
541 my @match = grep /$pat/, keys %$set;
542
543 if ($inc) {
544 @include{@match} = delete @$set{@match};
545 } else {
546 delete @$set{@{ $_->[I_DEP] }} # remove dependents
547 for delete @$set{@match};
548 }
549 }
550
551 my @include = keys %include;
552 @$set{@include} = delete @include{@include};
553 }
554
555 =item $extractor->runtime_only
556
557 This removes all files that are not needed at runtime, such as static
558 archives, header and other files needed only for compilation of modules,
559 and pod and html files (which are unlikely to be needed at runtime).
560
561 This is quite useful when you want to have only fiels actually needed to
562 execute a program.
563
564 =cut
565
566 sub runtime_only {
567 my ($self) = @_;
568
569 $self->_trace_flush;
570
571 my $set = $self->{set};
572
573 # delete all static libraries
574 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
575
576 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
577 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
578
579 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
580 delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
581 }
582
583 =back
584
585 =head1 ALGORITHMS
586 #TODO
587
588 =head2 Module/trace-based additions
589 #TODO
590
591 For example, when using L<Coro::AnyEvent> or
592 L<AnyEvent::DNS> are added, then also all (relevant) files from the
593 L<Coro> and L<AnyEvent> distributions will be included.
594
595 The only exception is perl itself
596
597 =head2 Glob/path-based modifications
598 #TODO
599
600 =head1 EXTENDED GLOB PATTERNS
601 #TODO
602
603 =head1 SEE ALSO
604
605 The utility program that comes with this module: L<perl-libextract>.
606
607 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
608
609 =head1 AUTHOR
610
611 Marc Lehmann <schmorp@schmorp.de>
612 http://home.schmorp.de/
613
614 =cut
615
616 1;
617