ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.2
Committed: Sat Jan 14 18:42:53 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.1: +108 -66 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set of
25 files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =over 4
33
34 =cut
35
36 package Perl::LibExtractor;
37
38 our $VERSION = '0.1';
39
40 use Config;
41 use File::Temp ();
42
43 use common::sense;
44
45 sub I_SRC () { 0 }
46 sub I_DEP () { 1 }
47
48 sub croak($) {
49 require Carp;
50 Carp::croak "(Perl::LibExtractor) $_[0]";
51 }
52
53 =item $extractor = new Perl::LibExtractor [key => value...]
54
55 Creates a new extractor object. Each extractor object stores some
56 configuration options and a subset of files that can be queried at any
57 time,.
58
59 The following key-value pairs exist, with default values as specified.
60
61 =over 4
62
63 =item exedir => "bin"
64
65 The prefix to use for the suggested target path for perl
66 executables. Defaults to F<bin>.
67
68 =item libdir => "lib"
69
70 The prefix to use for the suggested target path of perl library
71 files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72 etc.). Defaults to F<lib>.
73
74 =item bindir => "bin"
75
76 The prefix to use for the suggested target path for (non-perl)
77 executables. Defaults to F<bin>.
78
79 =item dlldir => "bin"
80
81 The prefix to use for the suggested target path of any shared
82 libraries. Defaults to F<bin>.
83
84 =item inc => \@INC without "."
85
86 An arrayref with paths to perl library directories. The default is
87 C<\@INC>, with F<.> removed.
88
89 To prepend custom dirs just do this:
90
91 inc => ["mydir", @INC],
92
93 =item use_packlists => 1
94
95 Enable (if true) or disable the use of C<.packlists>. If enabled, then
96 each time a module is included, the complete distribution that contains
97 it is included as well. See L<ALGORITHMS>, below.
98
99 =back
100
101 =cut
102
103 sub new {
104 my ($class, %kv) = @_;
105
106 my $self = bless {
107 exedir => "bin",
108 libdir => "lib",
109 bindir => "bin",
110 dlldir => "bin",
111 inc => [grep $_ ne ".", @INC],
112 use_packlists => 1,
113 %kv,
114 set => {},
115 }, $class;
116
117 my %inc_seen;
118 my @inc = grep !$inc_seen{$_}++, @{ $self->{inc} };
119 $self->{inc} = \@inc;
120
121 $self->_set_inc;
122
123 $self
124 }
125
126 sub _set_inc {
127 my ($self) = @_;
128
129 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
130 $matchprefix = qr<^(?:$matchprefix)/>;
131
132 my %lib;
133 my @packlists;
134
135 # find all files in all libdirs, earlier ones overwrite later ones
136 my @scan = map [$_, ""], @{ $self->{inc} };
137
138 while (@scan) {
139 my ($root, $dir) = @{ pop @scan };
140
141 my $pfx = length $dir ? "$dir/" : "";
142
143 for (do {
144 opendir my $fh, "$root/$dir"
145 or croak "$root/$dir: $!";
146 grep !/^\.\.?$/, readdir $fh
147 }) {
148 if (-d "$root/$dir/$_/.") {
149 $lib{"$pfx$_/"} = "$root/$pfx$_";
150 push @scan, [$root, "$pfx$_"];
151 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
152 push @packlists, [$root, $pfx];
153 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
154 # skip empty .bs files
155 # } elsif (/\.(?:pod|h|html)$/) {
156 # # not interested in those
157 } else {
158 #push @files, $_;
159 $lib{"$pfx$_"} = "$root/$pfx$_";
160 }
161 }
162
163 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
164 }
165
166 my %packlist;
167
168 # need to go forward here
169 for (@packlists) {
170 my ($root, $auto) = @$_;
171
172 my @packlist;
173
174 open my $fh, "<:perlio", "$root/$auto/.packlist"
175 or die "$root/$auto/.packlist: $!";
176
177 $root = qr<^\Q$root/>;
178
179 while (<$fh>) {
180 chomp;
181 s/ .*$//; # newer-style .packlists might contain key=value pairs
182 s%/\./%/%g; # yeah, these too
183
184 s/$root// or next;
185 exists $lib{$_} or next;
186
187 push @packlist, $_;
188 $packlist{$_} = \@packlist;
189 }
190 }
191
192 $self->{lib} = \%lib;
193 $self->{packlist} = \%packlist;
194 $self->{matchprefix} = $matchprefix;
195 }
196
197 sub _add {
198 my ($self, $add) = @_;
199
200 my $lib = $self->{lib};
201 my $path;
202
203 for (@$add) {
204 $path = "$self->{libdir}/$_";
205
206 $self->{set}{$path} ||= do {
207 my @info;
208
209 $info[I_SRC] = $lib->{$_}
210 or croak "$_: unable to locate file in perl library";
211
212 if ($self->{use_packlists} && exists $self->{packlist}{$_}) {
213 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
214 for @{ $self->{packlist}{$_} };
215 } elsif (/^(.*)\.pm$/) {
216 (my $auto = "auto/$1/") =~ s%::%/%g;
217 $auto =~ m%/([^/]+)/$% or die;
218 my $base = $1;
219
220 if (exists $lib->{$auto}) {
221 # auto dir exists, scan it for cool stuff
222
223 # 1. shared object, others are of no interest to us
224 my $so = "$auto$base.$Config{dlext}";
225 if (my $src = $lib->{$so}) {
226 $so = "$self->{libdir}/$so";
227 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
228 }
229
230 # 2. autoloader/autosplit
231 my $ix = "${auto}autosplit.ix";
232 if (my $src = $lib->{$ix}) {
233 $ix = "$self->{libdir}/$ix";
234 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
235
236 open my $fh, "<:perlio", $src
237 or croak "$src: $!";
238
239 my $package;
240
241 while (<$fh>) {
242 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
243 my $al = "auto/$package/$1.al";
244 my $src = $lib->{$al}
245 or croak "$al: autoload file not found, but should be there.";
246
247 $al = "$self->{libdir}/$al";
248 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
249
250 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
251 ($package = $1) =~ s/::/\//g;
252 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
253 # nop
254 } else {
255 warn "WARNING: $src: unparsable line, please report: $_";
256 }
257 }
258 }
259
260 skip:
261 }
262 }
263
264 \@info
265 };
266 }
267 }
268
269 sub _trace {
270 my ($self, $file, $eval) = @_;
271
272 $self->{trace_begin} .= "#line \"$file\" 1\n$eval;\n";
273 }
274
275 sub _trace_flush {
276 my ($self) = @_;
277
278 return unless exists $self->{trace_begin} or exists $self->{trace_check};
279
280 my $tmpdir = newdir File::Temp;
281 my $dir = $tmpdir->dirname;
282
283 open my $fh, ">:perlio", "$dir/eval"
284 or croak "$dir/eval: $!";
285 syswrite $fh,
286 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
287 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
288 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
289 . "CHECK {\n"
290 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
291 . 'print join "\x00", values %INC;'
292 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
293 . "}\n"
294 . (delete $self->{trace_check});
295 close $fh;
296
297 my $secure_perl_path = $Config{perlpath};
298
299 if ($^O ne 'VMS') {
300 $secure_perl_path .= $Config{_exe}
301 unless $secure_perl_path =~ m/$Config{_exe}$/i;
302 }
303
304 system $secure_perl_path, "-c", "$dir/eval"
305 and croak "trace failure, check trace process output.";
306
307 my @inc = split /\x00/, do {
308 open my $fh, "<:perlio", "$dir/out"
309 or croak "$dir/out: $!";
310 local $/;
311 scalar readline $fh
312 };
313
314 my $matchprefix = $self->{matchprefix};
315
316 # remove the library directory prefix, hope for the best
317 s/$matchprefix//
318 or croak "$_: file outside any library directory"
319 for @inc;
320
321 $self->_add (\@inc);
322 }
323
324 =item $extractor->add_mod ($module[, $module...])
325
326 Adds the given module(s) to the file set - the module name must be specified
327 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
328
329 The program will be loaded with the default import list, any dependent
330 files, such as the shared object implementing xs functions, or autoload
331 files, will also be added.
332
333 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
334 from the distribution they are part of.
335
336 $extractor->add_mod ("Coro", "AnyEvent::AIO");
337
338 =cut
339
340 sub add_mod {
341 my $self = shift;
342
343 my $pkg = "libextractor" . ++$self->{count};
344
345 $self->_trace ("use $_", "{ package $pkg; use $_ }")
346 for @_;
347 }
348
349 =item $extractor->add_exe ($name[, $name...])
350
351 Adds the given (perl) program(s) to the file set, that is, a program
352 installed by some perl module, written in perl (an example would be the
353 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
354 distribution).
355
356 Example: add the deliantra client installed by the L<Deliantra::Client>
357 module.
358
359 $extractor->add_exe ("deliantra");
360
361 =cut
362
363 sub add_exe {
364 my $self = shift;
365
366 exe:
367 for my $exe (@_) {
368 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
369 if (open my $fh, "<:perlio", "$dir/$exe") {
370
371 my $file = do { local $/; readline $fh };
372
373 $self->_trace_flush if exists $self->{trace_check};
374 $self->{trace_check} = $file;
375
376 $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
377 next exe;
378 }
379 }
380
381 croak "add_exe ($exe): executable not found";
382 }
383 }
384
385 =item $extractor->add_eval ($string)
386
387 Evaluates the string and adds all modules that are loaded by it. For
388 example, this would add L<AnyEvent> and the default backend implementation
389 module and event loop module:
390
391 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
392
393 =cut
394
395 sub add_eval {
396 my ($self, $eval) = @_;
397
398 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
399 $self->_trace ($eval, "eval q\x00$eval\x00 or die;\n");
400 }
401
402 =item $extractor->add_perl
403
404 Adds the perl binary itself to the file set, including the libperl dll, if
405 needed.
406
407 =item $extractor->add_unicore_minimal
408
409 =item $extractor->add_unicore_all
410
411 =item $extractor->add_glob ($modglob[, $modglob...])
412
413 #TODO#
414
415 =item $extractor->filter ($pattern[, $pattern...])
416
417 Applies a series of include/exclude filters. Each filter must start
418 with either C<+> or C<->, to designate the pattern as I<include> or
419 I<exclude> pattern. The rest of the pattern is an extended glob pattern
420 (see L<EXTENDED GLOB PATTERNS>).
421
422 Each pattern is instantly applied, and all matching files will be
423 permanently included or excluded, that is, if you have an include pattern
424 then all files that were matched by it will be included in the set,
425 regardless of any further exclude patterns matching the same files.
426
427 Likewise, any file excluded by a pattern will not be included in the set,
428 even if matches by later include patterns.
429
430 Any files not matched by any expression will be included, that is, the
431 filter list has an implicit C<+/**> pattern at the end.
432
433 =cut
434
435 sub _extglob2re {
436 my $self = shift;
437
438 for (quotemeta $_[0]) {
439 s/\\\*\\\*/.*/g;
440 s/\\\*/[^\/]*/g;
441 s/\\\?/[^\/]/g;
442
443 unless (s%^\\/%%) {
444 s%\\:\\:%/%g;
445 $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
446 }
447
448 $_ .= '$';
449 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
450
451 return qr<^$_>s
452 }
453 }
454
455 sub filter {
456 my ($self, @patterns) = @_;
457
458 my @include;
459
460 for my $pat (@patterns) {
461 $pat =~ s/^([+\-])//
462 or croak "$_: not a valid filter pattern (missing + or - prefix)";
463 my $inc = $1 eq "+";
464 $pat = $self->_extglob2re ($pat);
465 my @match = grep /$pat/, keys %{ $self->{set} };
466 say;
467 say $pat;
468 say join "\n", @match;
469 }
470 }
471
472 =item $extractor->add_auto
473
474 #todo, not like this
475
476 =back
477
478 =head1 ALGORITHMS
479 #TODO
480
481 =head2 Module/trace-based additions
482 #TODO
483
484 For example, when using L<Coro::AnyEvent> or
485 L<AnyEvent::DNS> are added, then also all (relevant) files from the
486 L<Coro> and L<AnyEvent> distributions will be included.
487
488 The only exception is perl itself
489
490 =head2 Glob/path-based modifications
491 #TODO
492
493 =head1 EXTENDED GLOB PATTERNS
494 #TODO
495
496 =head1 SEE ALSO
497
498 The utility program that comes with this module: L<perl-libextract>.
499
500 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
501
502 =head1 AUTHOR
503
504 Marc Lehmann <schmorp@schmorp.de>
505 http://home.schmorp.de/
506
507 =cut
508
509 1;
510