ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.13
Committed: Fri Jan 20 20:07:59 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.12: +5 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72 root 1.12 Binary executables (such as the perl interpreter) are stored inside
73     F<bin/>, perl scripts are stored under F<script/>, perl library files are
74     stored under F<lib/> and shared libraries are stored under F<dll/>.
75    
76 root 1.1 The following key-value pairs exist, with default values as specified.
77    
78     =over 4
79    
80     =item inc => \@INC without "."
81    
82     An arrayref with paths to perl library directories. The default is
83     C<\@INC>, with F<.> removed.
84    
85     To prepend custom dirs just do this:
86    
87     inc => ["mydir", @INC],
88    
89 root 1.3 =item use_packlist => 1
90 root 1.1
91 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 root 1.5 then each time a file is traced, the complete distribution that contains
93     it is included (but not traced).
94    
95     If disabled, only shared objects and autoload files will be added.
96 root 1.1
97 root 1.13 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98     this option will fail. Other perls should be fine.
99    
100 root 1.1 =back
101    
102     =cut
103    
104     sub new {
105     my ($class, %kv) = @_;
106    
107     my $self = bless {
108 root 1.12 inc => [grep $_ ne ".", @INC],
109 root 1.3 use_packlist => 1,
110 root 1.1 %kv,
111     set => {},
112     }, $class;
113    
114     my %inc_seen;
115 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
116 root 1.1 $self->{inc} = \@inc;
117    
118     $self->_set_inc;
119    
120     $self
121     }
122    
123 root 1.4 sub _perl_path() {
124     my $secure_perl_path = $Config{perlpath};
125    
126     if ($^O ne 'VMS') {
127     $secure_perl_path .= $Config{_exe}
128     unless $secure_perl_path =~ m/$Config{_exe}$/i;
129     }
130    
131     $secure_perl_path
132     }
133    
134 root 1.8 sub _path2match {
135     my $re = join "|", map "\Q$_", @_;
136    
137     $re = "(?:$re)\\/";
138     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
139    
140     $case_tolerant
141     ? qr<$re>i
142     : qr<$re>
143     }
144    
145 root 1.11 sub _read_packlist {
146     my ($self, $root, $path) = @_;
147    
148     my $lib = $self->{lib};
149    
150     my @packlist;
151    
152     open my $fh, "<:perlio", "$root/$path"
153     or die "$root/$path: $!";
154    
155     $root = _path2match $root;
156    
157     while (<$fh>) {
158     chomp;
159     s/ .*$//; # newer-style .packlists might contain key=value pairs
160    
161     s/$root// and exists $lib->{$_}
162     or next;
163    
164     push @packlist, canonpath $_;
165     }
166    
167     \@packlist
168     }
169    
170 root 1.1 sub _set_inc {
171     my ($self) = @_;
172    
173 root 1.8 my $matchprefix = _path2match @{ $self->{inc }};
174 root 1.2
175 root 1.1 my %lib;
176 root 1.2 my @packlists;
177 root 1.1
178     # find all files in all libdirs, earlier ones overwrite later ones
179 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
180    
181     while (@scan) {
182     my ($root, $dir) = @{ pop @scan };
183    
184     my $pfx = length $dir ? "$dir/" : "";
185    
186     for (do {
187     opendir my $fh, "$root/$dir"
188     or croak "$root/$dir: $!";
189     grep !/^\.\.?$/, readdir $fh
190     }) {
191     if (-d "$root/$dir/$_/.") {
192     $lib{"$pfx$_/"} = "$root/$pfx$_";
193     push @scan, [$root, "$pfx$_"];
194     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
195     push @packlists, [$root, $pfx];
196     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
197     # skip empty .bs files
198     # } elsif (/\.(?:pod|h|html)$/) {
199     # # not interested in those
200     } else {
201     #push @files, $_;
202     $lib{"$pfx$_"} = "$root/$pfx$_";
203 root 1.1 }
204 root 1.2 }
205    
206     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
207     }
208    
209     my %packlist;
210 root 1.1
211 root 1.2 # need to go forward here
212     for (@packlists) {
213 root 1.11 my $packlist = $self->_read_packlist ($_->[0], "$_->[1]/.packlist");
214 root 1.2
215 root 1.11 $packlist{$_} = $packlist
216     for @$packlist;
217 root 1.1 }
218    
219 root 1.8 $self->{lib} = \%lib;
220     $self->{packlist} = \%packlist;
221 root 1.2 $self->{matchprefix} = $matchprefix;
222 root 1.1 }
223    
224 root 1.5 =back
225    
226     =head2 TRACE/PACKLIST BASED ADDING
227    
228     The following methods add various things to the set of files.
229    
230     Each time a perl file is added, it is scanned by tracing either loading,
231     execution or compiling it, and seeing which other perl modules and
232     libraries have been loaded.
233    
234     For each library file found this way, additional dependencies are added:
235     if packlists are enabled, then all files of the distribution that contains
236     the file will be added. If packlists are disabled, then only shared
237     objects and autoload files for modules will be added.
238    
239     Only files from perl library directories will be added automatically. Any
240     other files (such as manpages or scripts installed in the F<bin>
241     directory) are skipped.
242    
243     If there is an error, such as a module not being found, then this module
244     croaks (as opposed to silently skipping). If you want to add something of
245     which you are not sure it exists, then you can wrap the call into C<eval
246     {}>. In some cases, you can avoid this by executing the code you want
247     to work later using C<add_eval> - see C<add_core_support> for an actual
248     example of this technique.
249    
250     Note that packlists are meant to add files not covered by other
251     mechanisms, such as resource files and other data files loaded directly by
252     a module - they are not meant to add dependencies that are missed because
253     they only happen at runtime.
254    
255     For example, with packlists, when using L<AnyEvent>, then all event loop
256     backends are automatically added as well, but I<not> any event loops
257     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
258     packlists, only the backend that is being used is added (i.e. normally
259     none, as loading AnyEvent does not instantly load any backend).
260    
261     To catch the extra event loop dependencies, you can either initialise
262     AnyEvent so it picks a suitable backend:
263    
264     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
265    
266     Or you can directly load the backend modules you plan to use:
267    
268     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
269    
270     An example of a program (or module) that has extra resource files is
271     L<Deliantra::Client> - the normal tracing (without packlist usage) will
272     correctly add all submodules, but miss the fonts and textures. By using
273     the packlist, those files are added correctly.
274    
275     =over 4
276    
277     =cut
278    
279 root 1.1 sub _add {
280     my ($self, $add) = @_;
281    
282     my $lib = $self->{lib};
283     my $path;
284    
285     for (@$add) {
286 root 1.12 $path = "lib/$_";
287 root 1.1
288     $self->{set}{$path} ||= do {
289     my @info;
290    
291     $info[I_SRC] = $lib->{$_}
292     or croak "$_: unable to locate file in perl library";
293    
294 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
295 root 1.12 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
296 root 1.2 for @{ $self->{packlist}{$_} };
297 root 1.3
298     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
299     # s/\.pm$//;
300     # s%/%::%g;
301     # my $pkg = "libextractor" . ++$self->{count};
302     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
303     # unless $self->{_add_do}{$_}++;
304     # }
305     #
306     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
307     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
308    
309 root 1.2 } elsif (/^(.*)\.pm$/) {
310 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
311     $auto =~ m%/([^/]+)/$% or die;
312     my $base = $1;
313    
314     if (exists $lib->{$auto}) {
315     # auto dir exists, scan it for cool stuff
316    
317     # 1. shared object, others are of no interest to us
318     my $so = "$auto$base.$Config{dlext}";
319     if (my $src = $lib->{$so}) {
320 root 1.12 $so = "lib/$so";
321 root 1.1 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
322     }
323    
324     # 2. autoloader/autosplit
325     my $ix = "${auto}autosplit.ix";
326     if (my $src = $lib->{$ix}) {
327 root 1.12 $ix = "lib/$ix";
328 root 1.1 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
329    
330     open my $fh, "<:perlio", $src
331     or croak "$src: $!";
332    
333     my $package;
334    
335     while (<$fh>) {
336     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
337     my $al = "auto/$package/$1.al";
338     my $src = $lib->{$al}
339     or croak "$al: autoload file not found, but should be there.";
340    
341 root 1.12 $al = "lib/$al";
342 root 1.1 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
343    
344     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
345     ($package = $1) =~ s/::/\//g;
346     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
347     # nop
348     } else {
349     warn "WARNING: $src: unparsable line, please report: $_";
350     }
351     }
352     }
353    
354     skip:
355     }
356     }
357    
358     \@info
359     };
360     }
361     }
362    
363     sub _trace {
364     my ($self, $file, $eval) = @_;
365    
366 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
367 root 1.1 }
368    
369     sub _trace_flush {
370     my ($self) = @_;
371    
372 root 1.3 # ->_add might add additional files to trace
373     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
374     my $tmpdir = newdir File::Temp;
375     my $dir = $tmpdir->dirname;
376    
377     open my $fh, ">:perlio", "$dir/eval"
378     or croak "$dir/eval: $!";
379     syswrite $fh,
380     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
381     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
382     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
383     . "CHECK {\n"
384     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
385     . 'print join "\x00", values %INC;'
386     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
387     . "}\n"
388     . (delete $self->{trace_check});
389     close $fh;
390    
391 root 1.4 system _perl_path, "-c", "$dir/eval"
392 root 1.3 and croak "trace failure, check trace process output - caught";
393 root 1.1
394 root 1.3 my @inc = split /\x00/, do {
395     open my $fh, "<:perlio", "$dir/out"
396     or croak "$dir/out: $!";
397     local $/;
398     scalar readline $fh
399     };
400 root 1.1
401 root 1.3 my $matchprefix = $self->{matchprefix};
402 root 1.1
403 root 1.3 # remove the library directory prefix, hope for the best
404     s/$matchprefix//
405     or croak "$_: file outside any library directory"
406     for @inc;
407 root 1.1
408 root 1.3 $self->_add (\@inc);
409     }
410 root 1.1 }
411    
412     =item $extractor->add_mod ($module[, $module...])
413    
414     Adds the given module(s) to the file set - the module name must be specified
415     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
416    
417     The program will be loaded with the default import list, any dependent
418     files, such as the shared object implementing xs functions, or autoload
419     files, will also be added.
420    
421 root 1.5 If you want to use a different import list (for those rare modules wghere
422     import lists trigger different backend modules to be loaded for example),
423     you can use C<add_eval> instead:
424    
425     $extractor->add_eval ("use Module qw(a b c)");
426    
427 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
428     from the distribution they are part of.
429    
430     $extractor->add_mod ("Coro", "AnyEvent::AIO");
431    
432     =cut
433    
434     sub add_mod {
435     my $self = shift;
436    
437 root 1.3 for (@_) {
438     my $pkg = "libextractor" . ++$self->{count};
439     $self->_trace ("use $_", "{ package $pkg; use $_ }")
440     unless $self->{add_mod}{$_}++;
441     }
442 root 1.1 }
443    
444 root 1.12 =item $extractor->add_bin ($name[, $name...])
445 root 1.1
446     Adds the given (perl) program(s) to the file set, that is, a program
447     installed by some perl module, written in perl (an example would be the
448     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
449     distribution).
450    
451 root 1.5 Example: add the deliantra client program installed by the
452 root 1.12 L<Deliantra::Client> module and put it under F<bin/deliantra>.
453 root 1.1
454 root 1.12 $extractor->add_bin ("deliantra");
455 root 1.1
456     =cut
457    
458 root 1.12 sub add_bin {
459 root 1.1 my $self = shift;
460    
461     exe:
462     for my $exe (@_) {
463     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
464     if (open my $fh, "<:perlio", "$dir/$exe") {
465 root 1.12 if (-f $fh) {
466     my $file = do { local $/; readline $fh };
467 root 1.1
468 root 1.12 $self->_trace_flush if exists $self->{trace_check};
469     $self->{trace_check} = $file;
470 root 1.1
471 root 1.12 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
472     next exe;
473     }
474 root 1.1 }
475     }
476    
477 root 1.12 croak "add_bin ($exe): executable not found";
478 root 1.1 }
479     }
480    
481     =item $extractor->add_eval ($string)
482    
483 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
484     by it. For example, this would add L<AnyEvent> and the default backend
485     implementation module and event loop module:
486 root 1.1
487     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
488    
489 root 1.5 Each code snippet will be executed in its own package and under C<use
490     strict>.
491    
492 root 1.1 =cut
493    
494     sub add_eval {
495     my ($self, $eval) = @_;
496    
497 root 1.5 my $pkg = "libextractor" . ++$self->{count};
498 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
499 root 1.3 $self->_trace ($eval,
500     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
501 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
502 root 1.3 );
503 root 1.1 }
504    
505 root 1.5 =back
506    
507     =head2 OTHER METHODS FOR ADDING FILES
508    
509     The following methods add commonly used files that are either not covered
510     by other methods or add commonly-used dependencies.
511    
512     =over 4
513    
514 root 1.1 =item $extractor->add_perl
515    
516     Adds the perl binary itself to the file set, including the libperl dll, if
517     needed.
518    
519 root 1.12 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
520     some F<dll/libperl.so.XXX>.
521 root 1.5
522 root 1.4 =cut
523    
524     sub add_perl {
525     my ($self) = @_;
526    
527 root 1.12 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
528 root 1.4
529     # on debian, we have the special case of a perl binary linked against
530     # a static libperl.a (which is not available), but the Config says to use
531     # a shared library, which is in the wrong directory, too (which breaks
532     # every other perl installation on the system - they are so stupid).
533    
534     # that means we can't find the libperl.so, because dbeian actively breaks
535     # their perl install, and we don't need it. we work around this by silently
536     # not including the libperl if we cannot find it.
537    
538     if ($Config{useshrplib} eq "true") {
539 root 1.8 my ($libperl, $libpath);
540    
541     if ($^O eq "cygwin") {
542     $libperl = $Config{libperl};
543     $libpath = "$Config{binexp}/$libperl";
544     } elsif ($^O eq "MSWin32") {
545     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
546     $libpath = "$Config{binexp}/$libperl";
547     } else {
548     $libperl = $Config{libperl};
549     $libpath = $self->{lib}{"CORE/$libperl"};
550 root 1.4 }
551 root 1.8
552 root 1.12 $self->{set}{"dll/$libperl"} = $libpath
553 root 1.8 if length $libpath && -e $libpath;
554 root 1.4 }
555     }
556 root 1.3
557     =item $extractor->add_core_support
558    
559     Try to add modules and files needed to support commonly-used builtin
560     language features. For example to open a scalar for I/O you need the
561     L<PerlIO::scalar> module:
562    
563     open $fh, "<", \$scalar
564    
565     A number of regex and string features (e.g. C<ucfirst>) need some unicore
566     files, e.g.:
567    
568     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
569    
570     This call adds these files (simply by executing code similar to the above
571     code fragments).
572    
573     Notable things that are missing are other PerlIO layers, such as
574     L<PerlIO::encoding>, and named character and character class matches.
575    
576     =cut
577    
578     sub add_core_support {
579     my ($self) = @_;
580    
581     $self->add_eval ('my $v; open my $fh, "<", \$v');
582     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
583 root 1.10 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
584     $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
585 root 1.3 }
586 root 1.1
587 root 1.3 =item $extractor->add_unicore
588    
589 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
590 root 1.5 needed.
591    
592     If you are not sure which unicode character classes and similar unicore
593     databases you need, and you do not care about an extra one thousand(!)
594     files comprising 4MB of data, then you can just call this method, which
595     adds basically all files from perl's unicode database.
596    
597 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
598     subset of C<add_unicore> - the former adds all files neccessary to support
599     core builtins (which includes some unicore files and other things), while
600     the latter adds all unicore files (but nothing else).
601    
602     When in doubt, use both.
603    
604 root 1.5 =cut
605    
606     sub add_unicore {
607     my ($self) = @_;
608    
609     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
610     }
611    
612 root 1.11 =item $extractor->add_core
613    
614     This adds all files from the perl core distribution, that is, all library
615     files that come with perl.
616    
617     This is a superset of C<add_core_support> and C<add_unicore>.
618    
619     This is quite a lot, but on the plus side, you can be sure nothing is
620     missing.
621    
622 root 1.13 This requires a full perl installation - Debian GNU/Linux doesn't package
623     the full perl library, so this function will not work there.
624 root 1.11
625     =cut
626    
627     sub add_core {
628     my ($self) = @_;
629    
630     my $lib = $self->{lib};
631    
632     for (@{
633     $self->_read_packlist ($Config{privlibexp}, ".packlist")
634     }) {
635     $self->{set}{$_} ||= [
636 root 1.12 "lib/"
637 root 1.11 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
638     ];
639     }
640     }
641    
642 root 1.5 =back
643    
644     =head2 GLOB-BASED ADDING AND FILTERING
645 root 1.1
646 root 1.5 These methods add or manipulate files by using glob-based patterns.
647    
648     These glob patterns work similarly to glob patterns in the shell:
649    
650     =over 4
651    
652     =item /
653    
654     A F</> at the start of the pattern interprets the pattern as a file
655     path inside the file set, almost the same as in the shell. For example,
656     F</bin/perl*> would match all files whose names starting with F<perl>
657     inside the F<bin> directory in the set.
658    
659     If the F</> is missing, then the pattern is interpreted as a module name
660 root 1.12 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
661     while F<Coro::*> would match F<lib/Coro/*.pm>.
662 root 1.5
663     =item *
664    
665     A single star matches anything inside a single directory component. For
666     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
667     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
668 root 1.1
669 root 1.5 Another way to look at it is that a single star matches anything but a
670     slash (F</>).
671 root 1.1
672 root 1.5 =item **
673 root 1.1
674 root 1.5 A double star matches any number of characters in the path, including F</>.
675 root 1.1
676 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
677     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
678 root 1.1
679 root 1.5 =back
680 root 1.1
681 root 1.2 =cut
682    
683     sub _extglob2re {
684 root 1.5 for (quotemeta $_[1]) {
685 root 1.2 s/\\\*\\\*/.*/g;
686     s/\\\*/[^\/]*/g;
687     s/\\\?/[^\/]/g;
688    
689     unless (s%^\\/%%) {
690     s%\\:\\:%/%g;
691 root 1.12 $_ = "lib/$_\\.pm";
692 root 1.2 }
693    
694     $_ .= '$';
695     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
696    
697     return qr<^$_>s
698     }
699     }
700    
701 root 1.5 =over 4
702    
703     =item $extractor->add_glob ($modglob[, $modglob...])
704    
705     Adds all files from the perl library that match the given glob pattern.
706    
707     For example, you could implement C<add_unicore> yourself like this:
708    
709     $extractor->add_glob ("/unicore/**.pl");
710    
711     =cut
712    
713     sub add_glob {
714     my $self = shift;
715    
716     for (@_) {
717     my $pat = $self->_extglob2re ($_);
718     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
719     }
720     }
721    
722     =item $extractor->filter ($pattern[, $pattern...])
723    
724     Applies a series of include/exclude filters. Each filter must start with
725     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
726     pattern. The rest of the pattern is a normal glob pattern.
727    
728     An exclude pattern (C<->) instantly removes all matching files from
729     the set. An include pattern (C<+>) protects matching files from later
730     removals.
731    
732     That is, if you have an include pattern then all files that were matched
733     by it will be included in the set, regardless of any further exclude
734     patterns matching the same files.
735    
736     Likewise, any file excluded by a pattern will not be included in the set,
737     even if matched by later include patterns.
738    
739     Any files not matched by any expression will simply stay in the set.
740    
741     For example, to remove most of the useless autoload functions by the POSIX
742     module (they either do the same thing as a builtin or always raise an
743 root 1.12 error), you would use this:
744 root 1.5
745     $extractor->filter ("-/lib/auto/POSIX/*.al");
746    
747     This does not remove all autoload files, only the ones not defined by a
748     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
749    
750     =cut
751    
752 root 1.2 sub filter {
753     my ($self, @patterns) = @_;
754    
755 root 1.3 $self->_trace_flush;
756    
757     my $set = $self->{set};
758     my %include;
759 root 1.2
760     for my $pat (@patterns) {
761     $pat =~ s/^([+\-])//
762     or croak "$_: not a valid filter pattern (missing + or - prefix)";
763     my $inc = $1 eq "+";
764     $pat = $self->_extglob2re ($pat);
765 root 1.3
766     my @match = grep /$pat/, keys %$set;
767    
768     if ($inc) {
769     @include{@match} = delete @$set{@match};
770     } else {
771     delete @$set{@{ $_->[I_DEP] }} # remove dependents
772     for delete @$set{@match};
773     }
774 root 1.2 }
775 root 1.3
776     my @include = keys %include;
777     @$set{@include} = delete @include{@include};
778 root 1.2 }
779 root 1.1
780 root 1.3 =item $extractor->runtime_only
781    
782     This removes all files that are not needed at runtime, such as static
783     archives, header and other files needed only for compilation of modules,
784     and pod and html files (which are unlikely to be needed at runtime).
785 root 1.1
786 root 1.8 This is quite useful when you want to have only files actually needed to
787 root 1.3 execute a program.
788    
789     =cut
790    
791     sub runtime_only {
792     my ($self) = @_;
793    
794     $self->_trace_flush;
795    
796     my $set = $self->{set};
797    
798 root 1.8 # delete all static libraries, also windows stuff
799 root 1.12 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
800 root 1.3
801     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
802 root 1.12 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
803 root 1.3
804     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
805 root 1.12 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
806 root 1.11
807     # delete unneeded unicore files
808 root 1.12 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
809 root 1.3 }
810 root 1.1
811     =back
812    
813 root 1.5 =head2 RESULT SET
814    
815     =over 4
816 root 1.1
817 root 1.5 =item $set = $extractor->set
818 root 1.1
819 root 1.5 Returns a hash reference that represents the result set. The hash is the
820     actual internal storage hash and can only be modified as described below.
821 root 1.1
822 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
823     e.g.:
824 root 1.1
825 root 1.5 bin/perl
826     lib/unicore/lib/Blk/Superscr.pl
827     lib/AnyEvent/Impl/EV.pm
828    
829     The value is an array reference with mostly unspecified contents, except
830     the first element, which is the file system path where the actual file can
831     be found.
832    
833     This code snippet lists all files inside the set:
834    
835     print "$_\n"
836     for sort keys %{ $extractor->set });
837    
838     This code fragment prints C<< filesystem_path => set_path >> pairs for all
839     files in the set:
840    
841     my $set = $extractor->set;
842     while (my ($set,$fspath) = each %$set) {
843     print "$fspath => $set\n";
844     }
845 root 1.1
846 root 1.5 You can implement your own filtering by asking for the result set with
847     C<< $extractor->set >>, and then deleting keys from the referenced hash
848     - since you can ask for the result set at any time you can add things,
849     filter them out this way, and add additional things.
850    
851     =back
852    
853     =cut
854    
855     sub set {
856 root 1.6 $_[0]->_trace_flush;
857 root 1.5 $_[0]{set}
858     }
859    
860     =head1 EXAMPLE
861    
862     To package he deliantra client (L<Deliantra::Client>), finding all
863     (perl) files needed to run it is a first step. This can be done by using
864     something like the following code snippet:
865    
866 root 1.12 my $ex = new Perl::LibExtractor;
867 root 1.5
868     $ex->add_perl;
869     $ex->add_core_support;
870 root 1.12 $ex->add_bin ("deliantra");
871 root 1.5 $ex->add_mod ("AnyEvent::Impl::EV");
872     $ex->add_mod ("AnyEvent::Impl::Perl");
873     $ex->add_mod ("Urlader");
874     $ex->filter ("-/*/auto/POSIX/**.al");
875     $ex->runtime_only;
876    
877     First it sets the perl library directory to F<pm> and F<.> (the latter
878     to work around some AutoLoader bugs), so perl uses only the perl library
879     files that came with the binary package.
880    
881     Then it sets some environment variable to override the system default
882     (which might be incompatible).
883    
884     Then it runs the client itself, using C<require>. Since C<require> only
885     looks in the perl library directory this is the reaosn why the scripts
886     were put there (of course, since F<.> is also included it doesn't matter,
887     but I refuse to yield to bugs).
888    
889     Finally it exits with a clean status to signal "ok" to Urlader.
890    
891     Back to the original C<Perl::LibExtractor> script: after initialising a
892     new set, the script simply adds the F<perl> interpreter and core support
893     files (just in case, not all are needed, but some are, and I am too lazy
894     to find out which ones exactly).
895    
896     Then it adds the deliantra executable itself, which in turn adds most of
897     the required modules. After that, the AnyEvent implementation modules are
898     added because these dependencies are not picked up automatically.
899    
900     The L<Urlader> module is added because the client itself does not depend
901     on it at all, but the wrapper does.
902    
903     At this point, all required files are present, and it's time to slim
904     down: most of the ueseless POSIX autoloaded functions are removed,
905     not because they are so big, but because creating files is a costly
906     operation in itself, so even small fiels have considerable overhead when
907     unpacking. Then files not required for running the client are removed.
908    
909     And that concludes it, the set is now ready.
910 root 1.1
911     =head1 SEE ALSO
912    
913     The utility program that comes with this module: L<perl-libextract>.
914    
915     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
916    
917 root 1.5 =head1 LICENSE
918    
919     This software package is licensed under the GPL version 3 or any later
920     version, see COPYING for details.
921    
922     This license does not, of course, apply to any output generated by this
923     software.
924    
925 root 1.1 =head1 AUTHOR
926    
927     Marc Lehmann <schmorp@schmorp.de>
928     http://home.schmorp.de/
929    
930     =cut
931    
932     1;
933