ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.3
Committed: Sat Jan 14 21:14:36 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.2: +142 -60 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set of
25 files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =over 4
33
34 =cut
35
36 package Perl::LibExtractor;
37
38 our $VERSION = '0.1';
39
40 use Config;
41 use File::Temp ();
42
43 use common::sense;
44
45 sub I_SRC () { 0 }
46 sub I_DEP () { 1 }
47
48 sub croak($) {
49 require Carp;
50 Carp::croak "(Perl::LibExtractor) $_[0]";
51 }
52
53 =item $extractor = new Perl::LibExtractor [key => value...]
54
55 Creates a new extractor object. Each extractor object stores some
56 configuration options and a subset of files that can be queried at any
57 time,.
58
59 The following key-value pairs exist, with default values as specified.
60
61 =over 4
62
63 =item exedir => "bin"
64
65 The prefix to use for the suggested target path for perl
66 executables. Defaults to F<bin>.
67
68 =item libdir => "lib"
69
70 The prefix to use for the suggested target path of perl library
71 files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
72 etc.). Defaults to F<lib>.
73
74 =item bindir => "bin"
75
76 The prefix to use for the suggested target path for (non-perl)
77 executables. Defaults to F<bin>.
78
79 =item dlldir => "bin"
80
81 The prefix to use for the suggested target path of any shared
82 libraries. Defaults to F<bin>.
83
84 =item inc => \@INC without "."
85
86 An arrayref with paths to perl library directories. The default is
87 C<\@INC>, with F<.> removed.
88
89 To prepend custom dirs just do this:
90
91 inc => ["mydir", @INC],
92
93 =item use_packlist => 1
94
95 Enable (if true) or disable the use of C<.packlist> files. If enabled,
96 then each time a module is included, the complete distribution that
97 contains it is included (and traced) as well. See L<ALGORITHMS>, below.
98
99 =back
100
101 =cut
102
103 sub new {
104 my ($class, %kv) = @_;
105
106 my $self = bless {
107 exedir => "bin",
108 libdir => "lib",
109 bindir => "bin",
110 dlldir => "bin",
111 inc => [grep $_ ne ".", @INC],
112 use_packlist => 1,
113 %kv,
114 set => {},
115 }, $class;
116
117 my %inc_seen;
118 my @inc = grep !$inc_seen{$_}++, @{ $self->{inc} };
119 $self->{inc} = \@inc;
120
121 $self->_set_inc;
122
123 $self
124 }
125
126 sub _set_inc {
127 my ($self) = @_;
128
129 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
130 $matchprefix = qr<^(?:$matchprefix)/>;
131
132 my %lib;
133 my @packlists;
134
135 # find all files in all libdirs, earlier ones overwrite later ones
136 my @scan = map [$_, ""], @{ $self->{inc} };
137
138 while (@scan) {
139 my ($root, $dir) = @{ pop @scan };
140
141 my $pfx = length $dir ? "$dir/" : "";
142
143 for (do {
144 opendir my $fh, "$root/$dir"
145 or croak "$root/$dir: $!";
146 grep !/^\.\.?$/, readdir $fh
147 }) {
148 if (-d "$root/$dir/$_/.") {
149 $lib{"$pfx$_/"} = "$root/$pfx$_";
150 push @scan, [$root, "$pfx$_"];
151 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
152 push @packlists, [$root, $pfx];
153 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
154 # skip empty .bs files
155 # } elsif (/\.(?:pod|h|html)$/) {
156 # # not interested in those
157 } else {
158 #push @files, $_;
159 $lib{"$pfx$_"} = "$root/$pfx$_";
160 }
161 }
162
163 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
164 }
165
166 my %packlist;
167
168 # need to go forward here
169 for (@packlists) {
170 my ($root, $auto) = @$_;
171
172 my @packlist;
173
174 open my $fh, "<:perlio", "$root/$auto/.packlist"
175 or die "$root/$auto/.packlist: $!";
176
177 $root = qr<^\Q$root/>;
178
179 while (<$fh>) {
180 chomp;
181 s/ .*$//; # newer-style .packlists might contain key=value pairs
182 s%/\./%/%g; # yeah, these too
183
184 s/$root// or next;
185 exists $lib{$_} or next;
186
187 push @packlist, $_;
188 $packlist{$_} = \@packlist;
189 }
190 }
191
192 $self->{lib} = \%lib;
193 $self->{packlist} = \%packlist;
194 $self->{matchprefix} = $matchprefix;
195 }
196
197 sub _add {
198 my ($self, $add) = @_;
199
200 my $lib = $self->{lib};
201 my $path;
202
203 for (@$add) {
204 $path = "$self->{libdir}/$_";
205
206 $self->{set}{$path} ||= do {
207 my @info;
208
209 $info[I_SRC] = $lib->{$_}
210 or croak "$_: unable to locate file in perl library";
211
212 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
213 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
214 for @{ $self->{packlist}{$_} };
215
216 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
217 # s/\.pm$//;
218 # s%/%::%g;
219 # my $pkg = "libextractor" . ++$self->{count};
220 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
221 # unless $self->{_add_do}{$_}++;
222 # }
223 #
224 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
225 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
226
227 } elsif (/^(.*)\.pm$/) {
228 (my $auto = "auto/$1/") =~ s%::%/%g;
229 $auto =~ m%/([^/]+)/$% or die;
230 my $base = $1;
231
232 if (exists $lib->{$auto}) {
233 # auto dir exists, scan it for cool stuff
234
235 # 1. shared object, others are of no interest to us
236 my $so = "$auto$base.$Config{dlext}";
237 if (my $src = $lib->{$so}) {
238 $so = "$self->{libdir}/$so";
239 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
240 }
241
242 # 2. autoloader/autosplit
243 my $ix = "${auto}autosplit.ix";
244 if (my $src = $lib->{$ix}) {
245 $ix = "$self->{libdir}/$ix";
246 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
247
248 open my $fh, "<:perlio", $src
249 or croak "$src: $!";
250
251 my $package;
252
253 while (<$fh>) {
254 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
255 my $al = "auto/$package/$1.al";
256 my $src = $lib->{$al}
257 or croak "$al: autoload file not found, but should be there.";
258
259 $al = "$self->{libdir}/$al";
260 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
261
262 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
263 ($package = $1) =~ s/::/\//g;
264 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
265 # nop
266 } else {
267 warn "WARNING: $src: unparsable line, please report: $_";
268 }
269 }
270 }
271
272 skip:
273 }
274 }
275
276 \@info
277 };
278 }
279 }
280
281 sub _trace {
282 my ($self, $file, $eval) = @_;
283
284 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
285 }
286
287 sub _trace_flush {
288 my ($self) = @_;
289
290 # ->_add might add additional files to trace
291 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
292 my $tmpdir = newdir File::Temp;
293 my $dir = $tmpdir->dirname;
294
295 open my $fh, ">:perlio", "$dir/eval"
296 or croak "$dir/eval: $!";
297 syswrite $fh,
298 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
299 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
300 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
301 . "CHECK {\n"
302 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
303 . 'print join "\x00", values %INC;'
304 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
305 . "}\n"
306 . (delete $self->{trace_check});
307 close $fh;
308
309 my $secure_perl_path = $Config{perlpath};
310
311 if ($^O ne 'VMS') {
312 $secure_perl_path .= $Config{_exe}
313 unless $secure_perl_path =~ m/$Config{_exe}$/i;
314 }
315
316 system $secure_perl_path, "-c", "$dir/eval"
317 and croak "trace failure, check trace process output - caught";
318
319 my @inc = split /\x00/, do {
320 open my $fh, "<:perlio", "$dir/out"
321 or croak "$dir/out: $!";
322 local $/;
323 scalar readline $fh
324 };
325
326 my $matchprefix = $self->{matchprefix};
327
328 # remove the library directory prefix, hope for the best
329 s/$matchprefix//
330 or croak "$_: file outside any library directory"
331 for @inc;
332
333 $self->_add (\@inc);
334 }
335 }
336
337 =item $extractor->add_mod ($module[, $module...])
338
339 Adds the given module(s) to the file set - the module name must be specified
340 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
341
342 The program will be loaded with the default import list, any dependent
343 files, such as the shared object implementing xs functions, or autoload
344 files, will also be added.
345
346 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
347 from the distribution they are part of.
348
349 $extractor->add_mod ("Coro", "AnyEvent::AIO");
350
351 =cut
352
353 sub add_mod {
354 my $self = shift;
355
356 for (@_) {
357 my $pkg = "libextractor" . ++$self->{count};
358 $self->_trace ("use $_", "{ package $pkg; use $_ }")
359 unless $self->{add_mod}{$_}++;
360 }
361 }
362
363 =item $extractor->add_exe ($name[, $name...])
364
365 Adds the given (perl) program(s) to the file set, that is, a program
366 installed by some perl module, written in perl (an example would be the
367 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
368 distribution).
369
370 Example: add the deliantra client installed by the L<Deliantra::Client>
371 module.
372
373 $extractor->add_exe ("deliantra");
374
375 =cut
376
377 sub add_exe {
378 my $self = shift;
379
380 exe:
381 for my $exe (@_) {
382 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
383 if (open my $fh, "<:perlio", "$dir/$exe") {
384
385 my $file = do { local $/; readline $fh };
386
387 $self->_trace_flush if exists $self->{trace_check};
388 $self->{trace_check} = $file;
389
390 $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
391 next exe;
392 }
393 }
394
395 croak "add_exe ($exe): executable not found";
396 }
397 }
398
399 =item $extractor->add_eval ($string)
400
401 Evaluates the string and adds all modules that are loaded by it. For
402 example, this would add L<AnyEvent> and the default backend implementation
403 module and event loop module:
404
405 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
406
407 =cut
408
409 sub add_eval {
410 my ($self, $eval) = @_;
411
412 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
413 $self->_trace ($eval,
414 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
415 . "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
416 );
417 }
418
419 =item $extractor->add_perl
420
421 Adds the perl binary itself to the file set, including the libperl dll, if
422 needed.
423
424 #TODO#
425
426 =item $extractor->add_core_support
427
428 Try to add modules and files needed to support commonly-used builtin
429 language features. For example to open a scalar for I/O you need the
430 L<PerlIO::scalar> module:
431
432 open $fh, "<", \$scalar
433
434 A number of regex and string features (e.g. C<ucfirst>) need some unicore
435 files, e.g.:
436
437 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
438
439 This call adds these files (simply by executing code similar to the above
440 code fragments).
441
442 Notable things that are missing are other PerlIO layers, such as
443 L<PerlIO::encoding>, and named character and character class matches.
444
445 =cut
446
447 sub add_core_support {
448 my ($self) = @_;
449
450 $self->add_eval ('my $v; open my $fh, "<", \$v');
451 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
452 }
453
454 =item $extractor->add_unicore
455
456 #TODO
457
458 =item $extractor->add_glob ($modglob[, $modglob...])
459
460 #TODO#
461
462 =item $extractor->filter ($pattern[, $pattern...])
463
464 Applies a series of include/exclude filters. Each filter must start
465 with either C<+> or C<->, to designate the pattern as I<include> or
466 I<exclude> pattern. The rest of the pattern is an extended glob pattern
467 (see L<EXTENDED GLOB PATTERNS>).
468
469 Each pattern is instantly applied, and all matching files will be
470 permanently included or excluded, that is, if you have an include pattern
471 then all files that were matched by it will be included in the set,
472 regardless of any further exclude patterns matching the same files.
473
474 Likewise, any file excluded by a pattern will not be included in the set,
475 even if matches by later include patterns.
476
477 Any files not matched by any expression will be included, that is, the
478 filter list has an implicit C<+/**> pattern at the end.
479
480 =cut
481
482 sub _extglob2re {
483 my $self = shift;
484
485 for (quotemeta $_[0]) {
486 s/\\\*\\\*/.*/g;
487 s/\\\*/[^\/]*/g;
488 s/\\\?/[^\/]/g;
489
490 unless (s%^\\/%%) {
491 s%\\:\\:%/%g;
492 $_ = (quotemeta $self->{libdir}) . "/$_\\.pm";
493 }
494
495 $_ .= '$';
496 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
497
498 return qr<^$_>s
499 }
500 }
501
502 sub filter {
503 my ($self, @patterns) = @_;
504
505 $self->_trace_flush;
506
507 my $set = $self->{set};
508 my %include;
509
510 for my $pat (@patterns) {
511 $pat =~ s/^([+\-])//
512 or croak "$_: not a valid filter pattern (missing + or - prefix)";
513 my $inc = $1 eq "+";
514 $pat = $self->_extglob2re ($pat);
515
516 my @match = grep /$pat/, keys %$set;
517
518 if ($inc) {
519 @include{@match} = delete @$set{@match};
520 } else {
521 delete @$set{@{ $_->[I_DEP] }} # remove dependents
522 for delete @$set{@match};
523 }
524 }
525
526 my @include = keys %include;
527 @$set{@include} = delete @include{@include};
528 }
529
530 =item $extractor->runtime_only
531
532 This removes all files that are not needed at runtime, such as static
533 archives, header and other files needed only for compilation of modules,
534 and pod and html files (which are unlikely to be needed at runtime).
535
536 This is quite useful when you want to have only fiels actually needed to
537 execute a program.
538
539 =cut
540
541 sub runtime_only {
542 my ($self) = @_;
543
544 $self->_trace_flush;
545
546 my $set = $self->{set};
547
548 # delete all static libraries
549 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
550
551 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
552 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
553
554 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
555 delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
556 }
557
558 =back
559
560 =head1 ALGORITHMS
561 #TODO
562
563 =head2 Module/trace-based additions
564 #TODO
565
566 For example, when using L<Coro::AnyEvent> or
567 L<AnyEvent::DNS> are added, then also all (relevant) files from the
568 L<Coro> and L<AnyEvent> distributions will be included.
569
570 The only exception is perl itself
571
572 =head2 Glob/path-based modifications
573 #TODO
574
575 =head1 EXTENDED GLOB PATTERNS
576 #TODO
577
578 =head1 SEE ALSO
579
580 The utility program that comes with this module: L<perl-libextract>.
581
582 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
583
584 =head1 AUTHOR
585
586 Marc Lehmann <schmorp@schmorp.de>
587 http://home.schmorp.de/
588
589 =cut
590
591 1;
592