ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.2
Committed: Sat Jan 14 18:42:53 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.1: +108 -66 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.2 other methods as you want, to generate a set of files. Then query the set of
25     files and do whatever you want with them.
26    
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =over 4
33    
34     =cut
35    
36     package Perl::LibExtractor;
37    
38     our $VERSION = '0.1';
39    
40     use Config;
41     use File::Temp ();
42    
43     use common::sense;
44    
45     sub I_SRC () { 0 }
46     sub I_DEP () { 1 }
47    
48     sub croak($) {
49     require Carp;
50     Carp::croak "(Perl::LibExtractor) $_[0]";
51     }
52    
53     =item $extractor = new Perl::LibExtractor [key => value...]
54    
55     Creates a new extractor object. Each extractor object stores some
56     configuration options and a subset of files that can be queried at any
57     time,.
58    
59     The following key-value pairs exist, with default values as specified.
60    
61     =over 4
62    
63     =item exedir => "bin"
64    
65     The prefix to use for the suggested target path for perl
66     executables. Defaults to F<bin>.
67    
68     =item libdir => "lib"
69    
70     The prefix to use for the suggested target path of perl library
71     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72     etc.). Defaults to F<lib>.
73    
74     =item bindir => "bin"
75    
76     The prefix to use for the suggested target path for (non-perl)
77     executables. Defaults to F<bin>.
78    
79     =item dlldir => "bin"
80    
81     The prefix to use for the suggested target path of any shared
82     libraries. Defaults to F<bin>.
83    
84     =item inc => \@INC without "."
85    
86     An arrayref with paths to perl library directories. The default is
87     C<\@INC>, with F<.> removed.
88    
89     To prepend custom dirs just do this:
90    
91     inc => ["mydir", @INC],
92    
93     =item use_packlists => 1
94    
95     Enable (if true) or disable the use of C<.packlists>. If enabled, then
96     each time a module is included, the complete distribution that contains
97     it is included as well. See L<ALGORITHMS>, below.
98    
99     =back
100    
101     =cut
102    
103     sub new {
104     my ($class, %kv) = @_;
105    
106     my $self = bless {
107     exedir => "bin",
108     libdir => "lib",
109     bindir => "bin",
110     dlldir => "bin",
111     inc => [grep $_ ne ".", @INC],
112     use_packlists => 1,
113     %kv,
114     set => {},
115     }, $class;
116    
117     my %inc_seen;
118     my @inc = grep !$inc_seen{$_}++, @{ $self->{inc} };
119     $self->{inc} = \@inc;
120    
121     $self->_set_inc;
122    
123     $self
124     }
125    
126     sub _set_inc {
127     my ($self) = @_;
128    
129 root 1.2 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
130     $matchprefix = qr<^(?:$matchprefix)/>;
131    
132 root 1.1 my %lib;
133 root 1.2 my @packlists;
134 root 1.1
135     # find all files in all libdirs, earlier ones overwrite later ones
136 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
137    
138     while (@scan) {
139     my ($root, $dir) = @{ pop @scan };
140    
141     my $pfx = length $dir ? "$dir/" : "";
142    
143     for (do {
144     opendir my $fh, "$root/$dir"
145     or croak "$root/$dir: $!";
146     grep !/^\.\.?$/, readdir $fh
147     }) {
148     if (-d "$root/$dir/$_/.") {
149     $lib{"$pfx$_/"} = "$root/$pfx$_";
150     push @scan, [$root, "$pfx$_"];
151     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
152     push @packlists, [$root, $pfx];
153     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
154     # skip empty .bs files
155     # } elsif (/\.(?:pod|h|html)$/) {
156     # # not interested in those
157     } else {
158     #push @files, $_;
159     $lib{"$pfx$_"} = "$root/$pfx$_";
160 root 1.1 }
161 root 1.2 }
162    
163     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
164     }
165    
166     my %packlist;
167 root 1.1
168 root 1.2 # need to go forward here
169     for (@packlists) {
170     my ($root, $auto) = @$_;
171    
172     my @packlist;
173    
174     open my $fh, "<:perlio", "$root/$auto/.packlist"
175     or die "$root/$auto/.packlist: $!";
176    
177     $root = qr<^\Q$root/>;
178    
179     while (<$fh>) {
180     chomp;
181     s/ .*$//; # newer-style .packlists might contain key=value pairs
182     s%/\./%/%g; # yeah, these too
183    
184     s/$root// or next;
185     exists $lib{$_} or next;
186 root 1.1
187 root 1.2 push @packlist, $_;
188     $packlist{$_} = \@packlist;
189     }
190 root 1.1 }
191    
192     $self->{lib} = \%lib;
193 root 1.2 $self->{packlist} = \%packlist;
194     $self->{matchprefix} = $matchprefix;
195 root 1.1 }
196    
197     sub _add {
198     my ($self, $add) = @_;
199    
200     my $lib = $self->{lib};
201     my $path;
202    
203     for (@$add) {
204     $path = "$self->{libdir}/$_";
205    
206     $self->{set}{$path} ||= do {
207     my @info;
208    
209     $info[I_SRC] = $lib->{$_}
210     or croak "$_: unable to locate file in perl library";
211    
212 root 1.2 if ($self->{use_packlists} && exists $self->{packlist}{$_}) {
213     $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
214     for @{ $self->{packlist}{$_} };
215     } elsif (/^(.*)\.pm$/) {
216 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
217     $auto =~ m%/([^/]+)/$% or die;
218     my $base = $1;
219    
220     if (exists $lib->{$auto}) {
221     # auto dir exists, scan it for cool stuff
222    
223     # 1. shared object, others are of no interest to us
224     my $so = "$auto$base.$Config{dlext}";
225     if (my $src = $lib->{$so}) {
226     $so = "$self->{libdir}/$so";
227     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
228     }
229    
230     # 2. autoloader/autosplit
231     my $ix = "${auto}autosplit.ix";
232     if (my $src = $lib->{$ix}) {
233     $ix = "$self->{libdir}/$ix";
234     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
235    
236     open my $fh, "<:perlio", $src
237     or croak "$src: $!";
238    
239     my $package;
240    
241     while (<$fh>) {
242     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
243     my $al = "auto/$package/$1.al";
244     my $src = $lib->{$al}
245     or croak "$al: autoload file not found, but should be there.";
246    
247     $al = "$self->{libdir}/$al";
248     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
249    
250     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
251     ($package = $1) =~ s/::/\//g;
252     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
253     # nop
254     } else {
255     warn "WARNING: $src: unparsable line, please report: $_";
256     }
257     }
258     }
259    
260     skip:
261     }
262     }
263    
264     \@info
265     };
266     }
267     }
268    
269     sub _trace {
270     my ($self, $file, $eval) = @_;
271    
272     $self->{trace_begin} .= "#line \"$file\" 1\n$eval;\n";
273     }
274    
275     sub _trace_flush {
276     my ($self) = @_;
277    
278     return unless exists $self->{trace_begin} or exists $self->{trace_check};
279    
280     my $tmpdir = newdir File::Temp;
281     my $dir = $tmpdir->dirname;
282    
283     open my $fh, ">:perlio", "$dir/eval"
284     or croak "$dir/eval: $!";
285     syswrite $fh,
286     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
287     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
288     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
289     . "CHECK {\n"
290     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
291     . 'print join "\x00", values %INC;'
292     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
293     . "}\n"
294     . (delete $self->{trace_check});
295     close $fh;
296    
297     my $secure_perl_path = $Config{perlpath};
298    
299     if ($^O ne 'VMS') {
300     $secure_perl_path .= $Config{_exe}
301     unless $secure_perl_path =~ m/$Config{_exe}$/i;
302     }
303    
304     system $secure_perl_path, "-c", "$dir/eval"
305     and croak "trace failure, check trace process output.";
306    
307     my @inc = split /\x00/, do {
308     open my $fh, "<:perlio", "$dir/out"
309     or croak "$dir/out: $!";
310     local $/;
311     scalar readline $fh
312     };
313    
314     my $matchprefix = $self->{matchprefix};
315    
316     # remove the library directory prefix, hope for the best
317     s/$matchprefix//
318     or croak "$_: file outside any library directory"
319     for @inc;
320    
321     $self->_add (\@inc);
322     }
323    
324     =item $extractor->add_mod ($module[, $module...])
325    
326     Adds the given module(s) to the file set - the module name must be specified
327     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
328    
329     The program will be loaded with the default import list, any dependent
330     files, such as the shared object implementing xs functions, or autoload
331     files, will also be added.
332    
333     Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
334     from the distribution they are part of.
335    
336     $extractor->add_mod ("Coro", "AnyEvent::AIO");
337    
338     =cut
339    
340     sub add_mod {
341     my $self = shift;
342    
343     my $pkg = "libextractor" . ++$self->{count};
344    
345     $self->_trace ("use $_", "{ package $pkg; use $_ }")
346     for @_;
347     }
348    
349     =item $extractor->add_exe ($name[, $name...])
350    
351     Adds the given (perl) program(s) to the file set, that is, a program
352     installed by some perl module, written in perl (an example would be the
353     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
354     distribution).
355    
356     Example: add the deliantra client installed by the L<Deliantra::Client>
357     module.
358    
359     $extractor->add_exe ("deliantra");
360    
361     =cut
362    
363     sub add_exe {
364     my $self = shift;
365    
366     exe:
367     for my $exe (@_) {
368     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
369     if (open my $fh, "<:perlio", "$dir/$exe") {
370    
371     my $file = do { local $/; readline $fh };
372    
373     $self->_trace_flush if exists $self->{trace_check};
374     $self->{trace_check} = $file;
375    
376     $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
377     next exe;
378     }
379     }
380    
381     croak "add_exe ($exe): executable not found";
382     }
383     }
384    
385     =item $extractor->add_eval ($string)
386    
387     Evaluates the string and adds all modules that are loaded by it. For
388     example, this would add L<AnyEvent> and the default backend implementation
389     module and event loop module:
390    
391     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
392    
393     =cut
394    
395     sub add_eval {
396     my ($self, $eval) = @_;
397    
398     $eval =~ s/\x00/\x00."\\x00".q\x00/g;
399     $self->_trace ($eval, "eval q\x00$eval\x00 or die;\n");
400     }
401    
402     =item $extractor->add_perl
403    
404     Adds the perl binary itself to the file set, including the libperl dll, if
405     needed.
406    
407     =item $extractor->add_unicore_minimal
408    
409     =item $extractor->add_unicore_all
410    
411     =item $extractor->add_glob ($modglob[, $modglob...])
412    
413     #TODO#
414    
415     =item $extractor->filter ($pattern[, $pattern...])
416    
417     Applies a series of include/exclude filters. Each filter must start
418     with either C<+> or C<->, to designate the pattern as I<include> or
419     I<exclude> pattern. The rest of the pattern is an extended glob pattern
420     (see L<EXTENDED GLOB PATTERNS>).
421    
422     Each pattern is instantly applied, and all matching files will be
423     permanently included or excluded, that is, if you have an include pattern
424     then all files that were matched by it will be included in the set,
425     regardless of any further exclude patterns matching the same files.
426    
427     Likewise, any file excluded by a pattern will not be included in the set,
428     even if matches by later include patterns.
429    
430     Any files not matched by any expression will be included, that is, the
431     filter list has an implicit C<+/**> pattern at the end.
432    
433 root 1.2 =cut
434    
435     sub _extglob2re {
436     my $self = shift;
437    
438     for (quotemeta $_[0]) {
439     s/\\\*\\\*/.*/g;
440     s/\\\*/[^\/]*/g;
441     s/\\\?/[^\/]/g;
442    
443     unless (s%^\\/%%) {
444     s%\\:\\:%/%g;
445     $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
446     }
447    
448     $_ .= '$';
449     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
450    
451     return qr<^$_>s
452     }
453     }
454    
455     sub filter {
456     my ($self, @patterns) = @_;
457    
458     my @include;
459    
460     for my $pat (@patterns) {
461     $pat =~ s/^([+\-])//
462     or croak "$_: not a valid filter pattern (missing + or - prefix)";
463     my $inc = $1 eq "+";
464     $pat = $self->_extglob2re ($pat);
465     my @match = grep /$pat/, keys %{ $self->{set} };
466     say;
467     say $pat;
468     say join "\n", @match;
469     }
470     }
471 root 1.1
472     =item $extractor->add_auto
473    
474     #todo, not like this
475    
476     =back
477    
478     =head1 ALGORITHMS
479     #TODO
480    
481     =head2 Module/trace-based additions
482     #TODO
483    
484     For example, when using L<Coro::AnyEvent> or
485     L<AnyEvent::DNS> are added, then also all (relevant) files from the
486     L<Coro> and L<AnyEvent> distributions will be included.
487    
488     The only exception is perl itself
489    
490     =head2 Glob/path-based modifications
491     #TODO
492    
493     =head1 EXTENDED GLOB PATTERNS
494 root 1.2 #TODO
495 root 1.1
496     =head1 SEE ALSO
497    
498     The utility program that comes with this module: L<perl-libextract>.
499    
500     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
501    
502     =head1 AUTHOR
503    
504     Marc Lehmann <schmorp@schmorp.de>
505     http://home.schmorp.de/
506    
507     =cut
508    
509     1;
510