ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.4
Committed: Sat Jan 14 21:38:34 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.3: +35 -10 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.2 other methods as you want, to generate a set of files. Then query the set of
25     files and do whatever you want with them.
26    
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =over 4
33    
34     =cut
35    
36     package Perl::LibExtractor;
37    
38     our $VERSION = '0.1';
39    
40     use Config;
41     use File::Temp ();
42    
43     use common::sense;
44    
45     sub I_SRC () { 0 }
46     sub I_DEP () { 1 }
47    
48     sub croak($) {
49     require Carp;
50     Carp::croak "(Perl::LibExtractor) $_[0]";
51     }
52    
53     =item $extractor = new Perl::LibExtractor [key => value...]
54    
55     Creates a new extractor object. Each extractor object stores some
56     configuration options and a subset of files that can be queried at any
57     time,.
58    
59     The following key-value pairs exist, with default values as specified.
60    
61     =over 4
62    
63     =item exedir => "bin"
64    
65     The prefix to use for the suggested target path for perl
66     executables. Defaults to F<bin>.
67    
68     =item libdir => "lib"
69    
70     The prefix to use for the suggested target path of perl library
71     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72     etc.). Defaults to F<lib>.
73    
74     =item bindir => "bin"
75    
76     The prefix to use for the suggested target path for (non-perl)
77     executables. Defaults to F<bin>.
78    
79     =item dlldir => "bin"
80    
81     The prefix to use for the suggested target path of any shared
82     libraries. Defaults to F<bin>.
83    
84     =item inc => \@INC without "."
85    
86     An arrayref with paths to perl library directories. The default is
87     C<\@INC>, with F<.> removed.
88    
89     To prepend custom dirs just do this:
90    
91     inc => ["mydir", @INC],
92    
93 root 1.3 =item use_packlist => 1
94 root 1.1
95 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
96     then each time a module is included, the complete distribution that
97     contains it is included (and traced) as well. See L<ALGORITHMS>, below.
98 root 1.1
99     =back
100    
101     =cut
102    
103     sub new {
104     my ($class, %kv) = @_;
105    
106     my $self = bless {
107     exedir => "bin",
108     libdir => "lib",
109     bindir => "bin",
110     dlldir => "bin",
111     inc => [grep $_ ne ".", @INC],
112 root 1.3 use_packlist => 1,
113 root 1.1 %kv,
114     set => {},
115     }, $class;
116    
117     my %inc_seen;
118 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
119 root 1.1 $self->{inc} = \@inc;
120    
121     $self->_set_inc;
122    
123     $self
124     }
125    
126 root 1.4 sub _perl_path() {
127     my $secure_perl_path = $Config{perlpath};
128    
129     if ($^O ne 'VMS') {
130     $secure_perl_path .= $Config{_exe}
131     unless $secure_perl_path =~ m/$Config{_exe}$/i;
132     }
133    
134     $secure_perl_path
135     }
136    
137 root 1.1 sub _set_inc {
138     my ($self) = @_;
139    
140 root 1.2 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
141     $matchprefix = qr<^(?:$matchprefix)/>;
142    
143 root 1.1 my %lib;
144 root 1.2 my @packlists;
145 root 1.1
146     # find all files in all libdirs, earlier ones overwrite later ones
147 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
148    
149     while (@scan) {
150     my ($root, $dir) = @{ pop @scan };
151    
152     my $pfx = length $dir ? "$dir/" : "";
153    
154     for (do {
155     opendir my $fh, "$root/$dir"
156     or croak "$root/$dir: $!";
157     grep !/^\.\.?$/, readdir $fh
158     }) {
159     if (-d "$root/$dir/$_/.") {
160     $lib{"$pfx$_/"} = "$root/$pfx$_";
161     push @scan, [$root, "$pfx$_"];
162     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
163     push @packlists, [$root, $pfx];
164     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
165     # skip empty .bs files
166     # } elsif (/\.(?:pod|h|html)$/) {
167     # # not interested in those
168     } else {
169     #push @files, $_;
170     $lib{"$pfx$_"} = "$root/$pfx$_";
171 root 1.1 }
172 root 1.2 }
173    
174     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
175     }
176    
177     my %packlist;
178 root 1.1
179 root 1.2 # need to go forward here
180     for (@packlists) {
181     my ($root, $auto) = @$_;
182    
183     my @packlist;
184    
185     open my $fh, "<:perlio", "$root/$auto/.packlist"
186     or die "$root/$auto/.packlist: $!";
187    
188     $root = qr<^\Q$root/>;
189    
190     while (<$fh>) {
191     chomp;
192     s/ .*$//; # newer-style .packlists might contain key=value pairs
193     s%/\./%/%g; # yeah, these too
194    
195     s/$root// or next;
196     exists $lib{$_} or next;
197 root 1.1
198 root 1.2 push @packlist, $_;
199     $packlist{$_} = \@packlist;
200     }
201 root 1.1 }
202    
203     $self->{lib} = \%lib;
204 root 1.2 $self->{packlist} = \%packlist;
205     $self->{matchprefix} = $matchprefix;
206 root 1.1 }
207    
208     sub _add {
209     my ($self, $add) = @_;
210    
211     my $lib = $self->{lib};
212     my $path;
213    
214     for (@$add) {
215     $path = "$self->{libdir}/$_";
216    
217     $self->{set}{$path} ||= do {
218     my @info;
219    
220     $info[I_SRC] = $lib->{$_}
221     or croak "$_: unable to locate file in perl library";
222    
223 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
224 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
225     for @{ $self->{packlist}{$_} };
226 root 1.3
227     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
228     # s/\.pm$//;
229     # s%/%::%g;
230     # my $pkg = "libextractor" . ++$self->{count};
231     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
232     # unless $self->{_add_do}{$_}++;
233     # }
234     #
235     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
236     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
237    
238 root 1.2 } elsif (/^(.*)\.pm$/) {
239 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
240     $auto =~ m%/([^/]+)/$% or die;
241     my $base = $1;
242    
243     if (exists $lib->{$auto}) {
244     # auto dir exists, scan it for cool stuff
245    
246     # 1. shared object, others are of no interest to us
247     my $so = "$auto$base.$Config{dlext}";
248     if (my $src = $lib->{$so}) {
249     $so = "$self->{libdir}/$so";
250     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
251     }
252    
253     # 2. autoloader/autosplit
254     my $ix = "${auto}autosplit.ix";
255     if (my $src = $lib->{$ix}) {
256     $ix = "$self->{libdir}/$ix";
257     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
258    
259     open my $fh, "<:perlio", $src
260     or croak "$src: $!";
261    
262     my $package;
263    
264     while (<$fh>) {
265     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
266     my $al = "auto/$package/$1.al";
267     my $src = $lib->{$al}
268     or croak "$al: autoload file not found, but should be there.";
269    
270     $al = "$self->{libdir}/$al";
271     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
272    
273     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
274     ($package = $1) =~ s/::/\//g;
275     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
276     # nop
277     } else {
278     warn "WARNING: $src: unparsable line, please report: $_";
279     }
280     }
281     }
282    
283     skip:
284     }
285     }
286    
287     \@info
288     };
289     }
290     }
291    
292     sub _trace {
293     my ($self, $file, $eval) = @_;
294    
295 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
296 root 1.1 }
297    
298     sub _trace_flush {
299     my ($self) = @_;
300    
301 root 1.3 # ->_add might add additional files to trace
302     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
303     my $tmpdir = newdir File::Temp;
304     my $dir = $tmpdir->dirname;
305    
306     open my $fh, ">:perlio", "$dir/eval"
307     or croak "$dir/eval: $!";
308     syswrite $fh,
309     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
310     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
311     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
312     . "CHECK {\n"
313     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
314     . 'print join "\x00", values %INC;'
315     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
316     . "}\n"
317     . (delete $self->{trace_check});
318     close $fh;
319    
320 root 1.4 system _perl_path, "-c", "$dir/eval"
321 root 1.3 and croak "trace failure, check trace process output - caught";
322 root 1.1
323 root 1.3 my @inc = split /\x00/, do {
324     open my $fh, "<:perlio", "$dir/out"
325     or croak "$dir/out: $!";
326     local $/;
327     scalar readline $fh
328     };
329 root 1.1
330 root 1.3 my $matchprefix = $self->{matchprefix};
331 root 1.1
332 root 1.3 # remove the library directory prefix, hope for the best
333     s/$matchprefix//
334     or croak "$_: file outside any library directory"
335     for @inc;
336 root 1.1
337 root 1.3 $self->_add (\@inc);
338     }
339 root 1.1 }
340    
341     =item $extractor->add_mod ($module[, $module...])
342    
343     Adds the given module(s) to the file set - the module name must be specified
344     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
345    
346     The program will be loaded with the default import list, any dependent
347     files, such as the shared object implementing xs functions, or autoload
348     files, will also be added.
349    
350     Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
351     from the distribution they are part of.
352    
353     $extractor->add_mod ("Coro", "AnyEvent::AIO");
354    
355     =cut
356    
357     sub add_mod {
358     my $self = shift;
359    
360 root 1.3 for (@_) {
361     my $pkg = "libextractor" . ++$self->{count};
362     $self->_trace ("use $_", "{ package $pkg; use $_ }")
363     unless $self->{add_mod}{$_}++;
364     }
365 root 1.1 }
366    
367     =item $extractor->add_exe ($name[, $name...])
368    
369     Adds the given (perl) program(s) to the file set, that is, a program
370     installed by some perl module, written in perl (an example would be the
371     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
372     distribution).
373    
374     Example: add the deliantra client installed by the L<Deliantra::Client>
375     module.
376    
377     $extractor->add_exe ("deliantra");
378    
379     =cut
380    
381     sub add_exe {
382     my $self = shift;
383    
384     exe:
385     for my $exe (@_) {
386     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
387     if (open my $fh, "<:perlio", "$dir/$exe") {
388    
389     my $file = do { local $/; readline $fh };
390    
391     $self->_trace_flush if exists $self->{trace_check};
392     $self->{trace_check} = $file;
393    
394     $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
395     next exe;
396     }
397     }
398    
399     croak "add_exe ($exe): executable not found";
400     }
401     }
402    
403     =item $extractor->add_eval ($string)
404    
405     Evaluates the string and adds all modules that are loaded by it. For
406     example, this would add L<AnyEvent> and the default backend implementation
407     module and event loop module:
408    
409     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
410    
411     =cut
412    
413     sub add_eval {
414     my ($self, $eval) = @_;
415    
416     $eval =~ s/\x00/\x00."\\x00".q\x00/g;
417 root 1.3 $self->_trace ($eval,
418     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
419     . "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
420     );
421 root 1.1 }
422    
423     =item $extractor->add_perl
424    
425     Adds the perl binary itself to the file set, including the libperl dll, if
426     needed.
427    
428 root 1.4 =cut
429    
430     sub add_perl {
431     my ($self) = @_;
432    
433     $self->{set}{"$self->{bindir}/perl$Config{_exe}"} = [_perl_path];
434    
435     # on debian, we have the special case of a perl binary linked against
436     # a static libperl.a (which is not available), but the Config says to use
437     # a shared library, which is in the wrong directory, too (which breaks
438     # every other perl installation on the system - they are so stupid).
439    
440     # that means we can't find the libperl.so, because dbeian actively breaks
441     # their perl install, and we don't need it. we work around this by silently
442     # not including the libperl if we cannot find it.
443    
444     if ($Config{useshrplib} eq "true") {
445     if (my $libperl = $self->{lib}{"CORE/$Config{libperl}"}) {
446     $self->{set}{"$self->{dlldir}/$Config{libperl}"} = $libperl;
447     }
448     }
449     }
450 root 1.3
451     =item $extractor->add_core_support
452    
453     Try to add modules and files needed to support commonly-used builtin
454     language features. For example to open a scalar for I/O you need the
455     L<PerlIO::scalar> module:
456    
457     open $fh, "<", \$scalar
458    
459     A number of regex and string features (e.g. C<ucfirst>) need some unicore
460     files, e.g.:
461    
462     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
463    
464     This call adds these files (simply by executing code similar to the above
465     code fragments).
466    
467     Notable things that are missing are other PerlIO layers, such as
468     L<PerlIO::encoding>, and named character and character class matches.
469    
470     =cut
471    
472     sub add_core_support {
473     my ($self) = @_;
474    
475     $self->add_eval ('my $v; open my $fh, "<", \$v');
476     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
477     }
478 root 1.1
479 root 1.3 =item $extractor->add_unicore
480    
481     #TODO
482 root 1.1
483     =item $extractor->add_glob ($modglob[, $modglob...])
484    
485     #TODO#
486    
487     =item $extractor->filter ($pattern[, $pattern...])
488    
489     Applies a series of include/exclude filters. Each filter must start
490     with either C<+> or C<->, to designate the pattern as I<include> or
491     I<exclude> pattern. The rest of the pattern is an extended glob pattern
492     (see L<EXTENDED GLOB PATTERNS>).
493    
494     Each pattern is instantly applied, and all matching files will be
495     permanently included or excluded, that is, if you have an include pattern
496     then all files that were matched by it will be included in the set,
497     regardless of any further exclude patterns matching the same files.
498    
499     Likewise, any file excluded by a pattern will not be included in the set,
500     even if matches by later include patterns.
501    
502     Any files not matched by any expression will be included, that is, the
503     filter list has an implicit C<+/**> pattern at the end.
504    
505 root 1.2 =cut
506    
507     sub _extglob2re {
508     my $self = shift;
509    
510     for (quotemeta $_[0]) {
511     s/\\\*\\\*/.*/g;
512     s/\\\*/[^\/]*/g;
513     s/\\\?/[^\/]/g;
514    
515     unless (s%^\\/%%) {
516     s%\\:\\:%/%g;
517     $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
518     }
519    
520     $_ .= '$';
521     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
522    
523     return qr<^$_>s
524     }
525     }
526    
527     sub filter {
528     my ($self, @patterns) = @_;
529    
530 root 1.3 $self->_trace_flush;
531    
532     my $set = $self->{set};
533     my %include;
534 root 1.2
535     for my $pat (@patterns) {
536     $pat =~ s/^([+\-])//
537     or croak "$_: not a valid filter pattern (missing + or - prefix)";
538     my $inc = $1 eq "+";
539     $pat = $self->_extglob2re ($pat);
540 root 1.3
541     my @match = grep /$pat/, keys %$set;
542    
543     if ($inc) {
544     @include{@match} = delete @$set{@match};
545     } else {
546     delete @$set{@{ $_->[I_DEP] }} # remove dependents
547     for delete @$set{@match};
548     }
549 root 1.2 }
550 root 1.3
551     my @include = keys %include;
552     @$set{@include} = delete @include{@include};
553 root 1.2 }
554 root 1.1
555 root 1.3 =item $extractor->runtime_only
556    
557     This removes all files that are not needed at runtime, such as static
558     archives, header and other files needed only for compilation of modules,
559     and pod and html files (which are unlikely to be needed at runtime).
560 root 1.1
561 root 1.3 This is quite useful when you want to have only fiels actually needed to
562     execute a program.
563    
564     =cut
565    
566     sub runtime_only {
567     my ($self) = @_;
568    
569     $self->_trace_flush;
570    
571     my $set = $self->{set};
572    
573     # delete all static libraries
574     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
575    
576     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
577     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
578    
579     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
580     delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
581     }
582 root 1.1
583     =back
584    
585     =head1 ALGORITHMS
586     #TODO
587    
588     =head2 Module/trace-based additions
589     #TODO
590    
591     For example, when using L<Coro::AnyEvent> or
592     L<AnyEvent::DNS> are added, then also all (relevant) files from the
593     L<Coro> and L<AnyEvent> distributions will be included.
594    
595     The only exception is perl itself
596    
597     =head2 Glob/path-based modifications
598     #TODO
599    
600     =head1 EXTENDED GLOB PATTERNS
601 root 1.2 #TODO
602 root 1.1
603     =head1 SEE ALSO
604    
605     The utility program that comes with this module: L<perl-libextract>.
606    
607     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
608    
609     =head1 AUTHOR
610    
611     Marc Lehmann <schmorp@schmorp.de>
612     http://home.schmorp.de/
613    
614     =cut
615    
616     1;
617