ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.3
Committed: Sat Jan 14 21:14:36 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.2: +142 -60 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.2 other methods as you want, to generate a set of files. Then query the set of
25     files and do whatever you want with them.
26    
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =over 4
33    
34     =cut
35    
36     package Perl::LibExtractor;
37    
38     our $VERSION = '0.1';
39    
40     use Config;
41     use File::Temp ();
42    
43     use common::sense;
44    
45     sub I_SRC () { 0 }
46     sub I_DEP () { 1 }
47    
48     sub croak($) {
49     require Carp;
50     Carp::croak "(Perl::LibExtractor) $_[0]";
51     }
52    
53     =item $extractor = new Perl::LibExtractor [key => value...]
54    
55     Creates a new extractor object. Each extractor object stores some
56     configuration options and a subset of files that can be queried at any
57     time,.
58    
59     The following key-value pairs exist, with default values as specified.
60    
61     =over 4
62    
63     =item exedir => "bin"
64    
65     The prefix to use for the suggested target path for perl
66     executables. Defaults to F<bin>.
67    
68     =item libdir => "lib"
69    
70     The prefix to use for the suggested target path of perl library
71     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72     etc.). Defaults to F<lib>.
73    
74     =item bindir => "bin"
75    
76     The prefix to use for the suggested target path for (non-perl)
77     executables. Defaults to F<bin>.
78    
79     =item dlldir => "bin"
80    
81     The prefix to use for the suggested target path of any shared
82     libraries. Defaults to F<bin>.
83    
84     =item inc => \@INC without "."
85    
86     An arrayref with paths to perl library directories. The default is
87     C<\@INC>, with F<.> removed.
88    
89     To prepend custom dirs just do this:
90    
91     inc => ["mydir", @INC],
92    
93 root 1.3 =item use_packlist => 1
94 root 1.1
95 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
96     then each time a module is included, the complete distribution that
97     contains it is included (and traced) as well. See L<ALGORITHMS>, below.
98 root 1.1
99     =back
100    
101     =cut
102    
103     sub new {
104     my ($class, %kv) = @_;
105    
106     my $self = bless {
107     exedir => "bin",
108     libdir => "lib",
109     bindir => "bin",
110     dlldir => "bin",
111     inc => [grep $_ ne ".", @INC],
112 root 1.3 use_packlist => 1,
113 root 1.1 %kv,
114     set => {},
115     }, $class;
116    
117     my %inc_seen;
118     my @inc = grep !$inc_seen{$_}++, @{ $self->{inc} };
119     $self->{inc} = \@inc;
120    
121     $self->_set_inc;
122    
123     $self
124     }
125    
126     sub _set_inc {
127     my ($self) = @_;
128    
129 root 1.2 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
130     $matchprefix = qr<^(?:$matchprefix)/>;
131    
132 root 1.1 my %lib;
133 root 1.2 my @packlists;
134 root 1.1
135     # find all files in all libdirs, earlier ones overwrite later ones
136 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
137    
138     while (@scan) {
139     my ($root, $dir) = @{ pop @scan };
140    
141     my $pfx = length $dir ? "$dir/" : "";
142    
143     for (do {
144     opendir my $fh, "$root/$dir"
145     or croak "$root/$dir: $!";
146     grep !/^\.\.?$/, readdir $fh
147     }) {
148     if (-d "$root/$dir/$_/.") {
149     $lib{"$pfx$_/"} = "$root/$pfx$_";
150     push @scan, [$root, "$pfx$_"];
151     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
152     push @packlists, [$root, $pfx];
153     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
154     # skip empty .bs files
155     # } elsif (/\.(?:pod|h|html)$/) {
156     # # not interested in those
157     } else {
158     #push @files, $_;
159     $lib{"$pfx$_"} = "$root/$pfx$_";
160 root 1.1 }
161 root 1.2 }
162    
163     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
164     }
165    
166     my %packlist;
167 root 1.1
168 root 1.2 # need to go forward here
169     for (@packlists) {
170     my ($root, $auto) = @$_;
171    
172     my @packlist;
173    
174     open my $fh, "<:perlio", "$root/$auto/.packlist"
175     or die "$root/$auto/.packlist: $!";
176    
177     $root = qr<^\Q$root/>;
178    
179     while (<$fh>) {
180     chomp;
181     s/ .*$//; # newer-style .packlists might contain key=value pairs
182     s%/\./%/%g; # yeah, these too
183    
184     s/$root// or next;
185     exists $lib{$_} or next;
186 root 1.1
187 root 1.2 push @packlist, $_;
188     $packlist{$_} = \@packlist;
189     }
190 root 1.1 }
191    
192     $self->{lib} = \%lib;
193 root 1.2 $self->{packlist} = \%packlist;
194     $self->{matchprefix} = $matchprefix;
195 root 1.1 }
196    
197     sub _add {
198     my ($self, $add) = @_;
199    
200     my $lib = $self->{lib};
201     my $path;
202    
203     for (@$add) {
204     $path = "$self->{libdir}/$_";
205    
206     $self->{set}{$path} ||= do {
207     my @info;
208    
209     $info[I_SRC] = $lib->{$_}
210     or croak "$_: unable to locate file in perl library";
211    
212 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
213 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
214     for @{ $self->{packlist}{$_} };
215 root 1.3
216     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
217     # s/\.pm$//;
218     # s%/%::%g;
219     # my $pkg = "libextractor" . ++$self->{count};
220     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
221     # unless $self->{_add_do}{$_}++;
222     # }
223     #
224     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
225     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
226    
227 root 1.2 } elsif (/^(.*)\.pm$/) {
228 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
229     $auto =~ m%/([^/]+)/$% or die;
230     my $base = $1;
231    
232     if (exists $lib->{$auto}) {
233     # auto dir exists, scan it for cool stuff
234    
235     # 1. shared object, others are of no interest to us
236     my $so = "$auto$base.$Config{dlext}";
237     if (my $src = $lib->{$so}) {
238     $so = "$self->{libdir}/$so";
239     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
240     }
241    
242     # 2. autoloader/autosplit
243     my $ix = "${auto}autosplit.ix";
244     if (my $src = $lib->{$ix}) {
245     $ix = "$self->{libdir}/$ix";
246     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
247    
248     open my $fh, "<:perlio", $src
249     or croak "$src: $!";
250    
251     my $package;
252    
253     while (<$fh>) {
254     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
255     my $al = "auto/$package/$1.al";
256     my $src = $lib->{$al}
257     or croak "$al: autoload file not found, but should be there.";
258    
259     $al = "$self->{libdir}/$al";
260     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
261    
262     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
263     ($package = $1) =~ s/::/\//g;
264     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
265     # nop
266     } else {
267     warn "WARNING: $src: unparsable line, please report: $_";
268     }
269     }
270     }
271    
272     skip:
273     }
274     }
275    
276     \@info
277     };
278     }
279     }
280    
281     sub _trace {
282     my ($self, $file, $eval) = @_;
283    
284 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
285 root 1.1 }
286    
287     sub _trace_flush {
288     my ($self) = @_;
289    
290 root 1.3 # ->_add might add additional files to trace
291     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
292     my $tmpdir = newdir File::Temp;
293     my $dir = $tmpdir->dirname;
294    
295     open my $fh, ">:perlio", "$dir/eval"
296     or croak "$dir/eval: $!";
297     syswrite $fh,
298     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
299     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
300     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
301     . "CHECK {\n"
302     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
303     . 'print join "\x00", values %INC;'
304     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
305     . "}\n"
306     . (delete $self->{trace_check});
307     close $fh;
308    
309     my $secure_perl_path = $Config{perlpath};
310    
311     if ($^O ne 'VMS') {
312     $secure_perl_path .= $Config{_exe}
313     unless $secure_perl_path =~ m/$Config{_exe}$/i;
314     }
315 root 1.1
316 root 1.3 system $secure_perl_path, "-c", "$dir/eval"
317     and croak "trace failure, check trace process output - caught";
318 root 1.1
319 root 1.3 my @inc = split /\x00/, do {
320     open my $fh, "<:perlio", "$dir/out"
321     or croak "$dir/out: $!";
322     local $/;
323     scalar readline $fh
324     };
325 root 1.1
326 root 1.3 my $matchprefix = $self->{matchprefix};
327 root 1.1
328 root 1.3 # remove the library directory prefix, hope for the best
329     s/$matchprefix//
330     or croak "$_: file outside any library directory"
331     for @inc;
332 root 1.1
333 root 1.3 $self->_add (\@inc);
334     }
335 root 1.1 }
336    
337     =item $extractor->add_mod ($module[, $module...])
338    
339     Adds the given module(s) to the file set - the module name must be specified
340     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
341    
342     The program will be loaded with the default import list, any dependent
343     files, such as the shared object implementing xs functions, or autoload
344     files, will also be added.
345    
346     Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
347     from the distribution they are part of.
348    
349     $extractor->add_mod ("Coro", "AnyEvent::AIO");
350    
351     =cut
352    
353     sub add_mod {
354     my $self = shift;
355    
356 root 1.3 for (@_) {
357     my $pkg = "libextractor" . ++$self->{count};
358     $self->_trace ("use $_", "{ package $pkg; use $_ }")
359     unless $self->{add_mod}{$_}++;
360     }
361 root 1.1 }
362    
363     =item $extractor->add_exe ($name[, $name...])
364    
365     Adds the given (perl) program(s) to the file set, that is, a program
366     installed by some perl module, written in perl (an example would be the
367     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
368     distribution).
369    
370     Example: add the deliantra client installed by the L<Deliantra::Client>
371     module.
372    
373     $extractor->add_exe ("deliantra");
374    
375     =cut
376    
377     sub add_exe {
378     my $self = shift;
379    
380     exe:
381     for my $exe (@_) {
382     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
383     if (open my $fh, "<:perlio", "$dir/$exe") {
384    
385     my $file = do { local $/; readline $fh };
386    
387     $self->_trace_flush if exists $self->{trace_check};
388     $self->{trace_check} = $file;
389    
390     $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
391     next exe;
392     }
393     }
394    
395     croak "add_exe ($exe): executable not found";
396     }
397     }
398    
399     =item $extractor->add_eval ($string)
400    
401     Evaluates the string and adds all modules that are loaded by it. For
402     example, this would add L<AnyEvent> and the default backend implementation
403     module and event loop module:
404    
405     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
406    
407     =cut
408    
409     sub add_eval {
410     my ($self, $eval) = @_;
411    
412     $eval =~ s/\x00/\x00."\\x00".q\x00/g;
413 root 1.3 $self->_trace ($eval,
414     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
415     . "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
416     );
417 root 1.1 }
418    
419     =item $extractor->add_perl
420    
421     Adds the perl binary itself to the file set, including the libperl dll, if
422     needed.
423    
424 root 1.3 #TODO#
425    
426     =item $extractor->add_core_support
427    
428     Try to add modules and files needed to support commonly-used builtin
429     language features. For example to open a scalar for I/O you need the
430     L<PerlIO::scalar> module:
431    
432     open $fh, "<", \$scalar
433    
434     A number of regex and string features (e.g. C<ucfirst>) need some unicore
435     files, e.g.:
436    
437     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
438    
439     This call adds these files (simply by executing code similar to the above
440     code fragments).
441    
442     Notable things that are missing are other PerlIO layers, such as
443     L<PerlIO::encoding>, and named character and character class matches.
444    
445     =cut
446    
447     sub add_core_support {
448     my ($self) = @_;
449    
450     $self->add_eval ('my $v; open my $fh, "<", \$v');
451     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
452     }
453 root 1.1
454 root 1.3 =item $extractor->add_unicore
455    
456     #TODO
457 root 1.1
458     =item $extractor->add_glob ($modglob[, $modglob...])
459    
460     #TODO#
461    
462     =item $extractor->filter ($pattern[, $pattern...])
463    
464     Applies a series of include/exclude filters. Each filter must start
465     with either C<+> or C<->, to designate the pattern as I<include> or
466     I<exclude> pattern. The rest of the pattern is an extended glob pattern
467     (see L<EXTENDED GLOB PATTERNS>).
468    
469     Each pattern is instantly applied, and all matching files will be
470     permanently included or excluded, that is, if you have an include pattern
471     then all files that were matched by it will be included in the set,
472     regardless of any further exclude patterns matching the same files.
473    
474     Likewise, any file excluded by a pattern will not be included in the set,
475     even if matches by later include patterns.
476    
477     Any files not matched by any expression will be included, that is, the
478     filter list has an implicit C<+/**> pattern at the end.
479    
480 root 1.2 =cut
481    
482     sub _extglob2re {
483     my $self = shift;
484    
485     for (quotemeta $_[0]) {
486     s/\\\*\\\*/.*/g;
487     s/\\\*/[^\/]*/g;
488     s/\\\?/[^\/]/g;
489    
490     unless (s%^\\/%%) {
491     s%\\:\\:%/%g;
492     $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
493     }
494    
495     $_ .= '$';
496     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
497    
498     return qr<^$_>s
499     }
500     }
501    
502     sub filter {
503     my ($self, @patterns) = @_;
504    
505 root 1.3 $self->_trace_flush;
506    
507     my $set = $self->{set};
508     my %include;
509 root 1.2
510     for my $pat (@patterns) {
511     $pat =~ s/^([+\-])//
512     or croak "$_: not a valid filter pattern (missing + or - prefix)";
513     my $inc = $1 eq "+";
514     $pat = $self->_extglob2re ($pat);
515 root 1.3
516     my @match = grep /$pat/, keys %$set;
517    
518     if ($inc) {
519     @include{@match} = delete @$set{@match};
520     } else {
521     delete @$set{@{ $_->[I_DEP] }} # remove dependents
522     for delete @$set{@match};
523     }
524 root 1.2 }
525 root 1.3
526     my @include = keys %include;
527     @$set{@include} = delete @include{@include};
528 root 1.2 }
529 root 1.1
530 root 1.3 =item $extractor->runtime_only
531    
532     This removes all files that are not needed at runtime, such as static
533     archives, header and other files needed only for compilation of modules,
534     and pod and html files (which are unlikely to be needed at runtime).
535 root 1.1
536 root 1.3 This is quite useful when you want to have only fiels actually needed to
537     execute a program.
538    
539     =cut
540    
541     sub runtime_only {
542     my ($self) = @_;
543    
544     $self->_trace_flush;
545    
546     my $set = $self->{set};
547    
548     # delete all static libraries
549     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
550    
551     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
552     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
553    
554     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
555     delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
556     }
557 root 1.1
558     =back
559    
560     =head1 ALGORITHMS
561     #TODO
562    
563     =head2 Module/trace-based additions
564     #TODO
565    
566     For example, when using L<Coro::AnyEvent> or
567     L<AnyEvent::DNS> are added, then also all (relevant) files from the
568     L<Coro> and L<AnyEvent> distributions will be included.
569    
570     The only exception is perl itself
571    
572     =head2 Glob/path-based modifications
573     #TODO
574    
575     =head1 EXTENDED GLOB PATTERNS
576 root 1.2 #TODO
577 root 1.1
578     =head1 SEE ALSO
579    
580     The utility program that comes with this module: L<perl-libextract>.
581    
582     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
583    
584     =head1 AUTHOR
585    
586     Marc Lehmann <schmorp@schmorp.de>
587     http://home.schmorp.de/
588    
589     =cut
590    
591     1;
592