ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.8
Committed: Mon Jan 16 22:24:47 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.7: +37 -21 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72     The following key-value pairs exist, with default values as specified.
73    
74     =over 4
75    
76     =item exedir => "bin"
77    
78 root 1.5 The prefix to use for the suggested target path for perl executables
79     (scripts). Defaults to F<bin>.
80 root 1.1
81     =item libdir => "lib"
82    
83     The prefix to use for the suggested target path of perl library
84     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
85     etc.). Defaults to F<lib>.
86    
87     =item bindir => "bin"
88    
89     The prefix to use for the suggested target path for (non-perl)
90     executables. Defaults to F<bin>.
91    
92     =item dlldir => "bin"
93    
94     The prefix to use for the suggested target path of any shared
95     libraries. Defaults to F<bin>.
96    
97     =item inc => \@INC without "."
98    
99     An arrayref with paths to perl library directories. The default is
100     C<\@INC>, with F<.> removed.
101    
102     To prepend custom dirs just do this:
103    
104     inc => ["mydir", @INC],
105    
106 root 1.3 =item use_packlist => 1
107 root 1.1
108 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
109 root 1.5 then each time a file is traced, the complete distribution that contains
110     it is included (but not traced).
111    
112     If disabled, only shared objects and autoload files will be added.
113 root 1.1
114     =back
115    
116     =cut
117    
118     sub new {
119     my ($class, %kv) = @_;
120    
121     my $self = bless {
122     exedir => "bin",
123     libdir => "lib",
124     bindir => "bin",
125     dlldir => "bin",
126     inc => [grep $_ ne ".", @INC],
127 root 1.3 use_packlist => 1,
128 root 1.1 %kv,
129     set => {},
130     }, $class;
131    
132     my %inc_seen;
133 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
134 root 1.1 $self->{inc} = \@inc;
135    
136     $self->_set_inc;
137    
138     $self
139     }
140    
141 root 1.4 sub _perl_path() {
142     my $secure_perl_path = $Config{perlpath};
143    
144     if ($^O ne 'VMS') {
145     $secure_perl_path .= $Config{_exe}
146     unless $secure_perl_path =~ m/$Config{_exe}$/i;
147     }
148    
149     $secure_perl_path
150     }
151    
152 root 1.8 sub _path2match {
153     my $re = join "|", map "\Q$_", @_;
154    
155     $re = "(?:$re)\\/";
156     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
157    
158     $case_tolerant
159     ? qr<$re>i
160     : qr<$re>
161     }
162    
163 root 1.1 sub _set_inc {
164     my ($self) = @_;
165    
166 root 1.8 my $matchprefix = _path2match @{ $self->{inc }};
167 root 1.2
168 root 1.1 my %lib;
169 root 1.2 my @packlists;
170 root 1.1
171     # find all files in all libdirs, earlier ones overwrite later ones
172 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
173    
174     while (@scan) {
175     my ($root, $dir) = @{ pop @scan };
176    
177     my $pfx = length $dir ? "$dir/" : "";
178    
179     for (do {
180     opendir my $fh, "$root/$dir"
181     or croak "$root/$dir: $!";
182     grep !/^\.\.?$/, readdir $fh
183     }) {
184     if (-d "$root/$dir/$_/.") {
185     $lib{"$pfx$_/"} = "$root/$pfx$_";
186     push @scan, [$root, "$pfx$_"];
187     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
188     push @packlists, [$root, $pfx];
189     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
190     # skip empty .bs files
191     # } elsif (/\.(?:pod|h|html)$/) {
192     # # not interested in those
193     } else {
194     #push @files, $_;
195     $lib{"$pfx$_"} = "$root/$pfx$_";
196 root 1.1 }
197 root 1.2 }
198    
199     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
200     }
201    
202     my %packlist;
203 root 1.1
204 root 1.2 # need to go forward here
205     for (@packlists) {
206     my ($root, $auto) = @$_;
207    
208     my @packlist;
209    
210     open my $fh, "<:perlio", "$root/$auto/.packlist"
211     or die "$root/$auto/.packlist: $!";
212    
213 root 1.8 $root = _path2match $root;
214 root 1.2
215     while (<$fh>) {
216     chomp;
217     s/ .*$//; # newer-style .packlists might contain key=value pairs
218    
219     s/$root// or next;
220 root 1.8 $_ = canonpath $_;
221 root 1.2 exists $lib{$_} or next;
222 root 1.1
223 root 1.2 push @packlist, $_;
224     $packlist{$_} = \@packlist;
225     }
226 root 1.1 }
227    
228 root 1.8 $self->{lib} = \%lib;
229     $self->{packlist} = \%packlist;
230 root 1.2 $self->{matchprefix} = $matchprefix;
231 root 1.1 }
232    
233 root 1.5 =back
234    
235     =head2 TRACE/PACKLIST BASED ADDING
236    
237     The following methods add various things to the set of files.
238    
239     Each time a perl file is added, it is scanned by tracing either loading,
240     execution or compiling it, and seeing which other perl modules and
241     libraries have been loaded.
242    
243     For each library file found this way, additional dependencies are added:
244     if packlists are enabled, then all files of the distribution that contains
245     the file will be added. If packlists are disabled, then only shared
246     objects and autoload files for modules will be added.
247    
248     Only files from perl library directories will be added automatically. Any
249     other files (such as manpages or scripts installed in the F<bin>
250     directory) are skipped.
251    
252     If there is an error, such as a module not being found, then this module
253     croaks (as opposed to silently skipping). If you want to add something of
254     which you are not sure it exists, then you can wrap the call into C<eval
255     {}>. In some cases, you can avoid this by executing the code you want
256     to work later using C<add_eval> - see C<add_core_support> for an actual
257     example of this technique.
258    
259     Note that packlists are meant to add files not covered by other
260     mechanisms, such as resource files and other data files loaded directly by
261     a module - they are not meant to add dependencies that are missed because
262     they only happen at runtime.
263    
264     For example, with packlists, when using L<AnyEvent>, then all event loop
265     backends are automatically added as well, but I<not> any event loops
266     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
267     packlists, only the backend that is being used is added (i.e. normally
268     none, as loading AnyEvent does not instantly load any backend).
269    
270     To catch the extra event loop dependencies, you can either initialise
271     AnyEvent so it picks a suitable backend:
272    
273     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
274    
275     Or you can directly load the backend modules you plan to use:
276    
277     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
278    
279     An example of a program (or module) that has extra resource files is
280     L<Deliantra::Client> - the normal tracing (without packlist usage) will
281     correctly add all submodules, but miss the fonts and textures. By using
282     the packlist, those files are added correctly.
283    
284     =over 4
285    
286     =cut
287    
288 root 1.1 sub _add {
289     my ($self, $add) = @_;
290    
291     my $lib = $self->{lib};
292     my $path;
293    
294     for (@$add) {
295     $path = "$self->{libdir}/$_";
296    
297     $self->{set}{$path} ||= do {
298     my @info;
299    
300     $info[I_SRC] = $lib->{$_}
301     or croak "$_: unable to locate file in perl library";
302    
303 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
304 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
305     for @{ $self->{packlist}{$_} };
306 root 1.3
307     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
308     # s/\.pm$//;
309     # s%/%::%g;
310     # my $pkg = "libextractor" . ++$self->{count};
311     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
312     # unless $self->{_add_do}{$_}++;
313     # }
314     #
315     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
316     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
317    
318 root 1.2 } elsif (/^(.*)\.pm$/) {
319 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
320     $auto =~ m%/([^/]+)/$% or die;
321     my $base = $1;
322    
323     if (exists $lib->{$auto}) {
324     # auto dir exists, scan it for cool stuff
325    
326     # 1. shared object, others are of no interest to us
327     my $so = "$auto$base.$Config{dlext}";
328     if (my $src = $lib->{$so}) {
329     $so = "$self->{libdir}/$so";
330     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
331     }
332    
333     # 2. autoloader/autosplit
334     my $ix = "${auto}autosplit.ix";
335     if (my $src = $lib->{$ix}) {
336     $ix = "$self->{libdir}/$ix";
337     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
338    
339     open my $fh, "<:perlio", $src
340     or croak "$src: $!";
341    
342     my $package;
343    
344     while (<$fh>) {
345     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
346     my $al = "auto/$package/$1.al";
347     my $src = $lib->{$al}
348     or croak "$al: autoload file not found, but should be there.";
349    
350     $al = "$self->{libdir}/$al";
351     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
352    
353     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
354     ($package = $1) =~ s/::/\//g;
355     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
356     # nop
357     } else {
358     warn "WARNING: $src: unparsable line, please report: $_";
359     }
360     }
361     }
362    
363     skip:
364     }
365     }
366    
367     \@info
368     };
369     }
370     }
371    
372     sub _trace {
373     my ($self, $file, $eval) = @_;
374    
375 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
376 root 1.1 }
377    
378     sub _trace_flush {
379     my ($self) = @_;
380    
381 root 1.3 # ->_add might add additional files to trace
382     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
383     my $tmpdir = newdir File::Temp;
384     my $dir = $tmpdir->dirname;
385    
386     open my $fh, ">:perlio", "$dir/eval"
387     or croak "$dir/eval: $!";
388     syswrite $fh,
389     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
390     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
391     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
392     . "CHECK {\n"
393     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
394     . 'print join "\x00", values %INC;'
395     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
396     . "}\n"
397     . (delete $self->{trace_check});
398     close $fh;
399    
400 root 1.4 system _perl_path, "-c", "$dir/eval"
401 root 1.3 and croak "trace failure, check trace process output - caught";
402 root 1.1
403 root 1.3 my @inc = split /\x00/, do {
404     open my $fh, "<:perlio", "$dir/out"
405     or croak "$dir/out: $!";
406     local $/;
407     scalar readline $fh
408     };
409 root 1.1
410 root 1.3 my $matchprefix = $self->{matchprefix};
411 root 1.1
412 root 1.3 # remove the library directory prefix, hope for the best
413     s/$matchprefix//
414     or croak "$_: file outside any library directory"
415     for @inc;
416 root 1.1
417 root 1.3 $self->_add (\@inc);
418     }
419 root 1.1 }
420    
421     =item $extractor->add_mod ($module[, $module...])
422    
423     Adds the given module(s) to the file set - the module name must be specified
424     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
425    
426     The program will be loaded with the default import list, any dependent
427     files, such as the shared object implementing xs functions, or autoload
428     files, will also be added.
429    
430 root 1.5 If you want to use a different import list (for those rare modules wghere
431     import lists trigger different backend modules to be loaded for example),
432     you can use C<add_eval> instead:
433    
434     $extractor->add_eval ("use Module qw(a b c)");
435    
436 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
437     from the distribution they are part of.
438    
439     $extractor->add_mod ("Coro", "AnyEvent::AIO");
440    
441     =cut
442    
443     sub add_mod {
444     my $self = shift;
445    
446 root 1.3 for (@_) {
447     my $pkg = "libextractor" . ++$self->{count};
448     $self->_trace ("use $_", "{ package $pkg; use $_ }")
449     unless $self->{add_mod}{$_}++;
450     }
451 root 1.1 }
452    
453 root 1.5 =item $extractor->add_script ($name[, $name...])
454 root 1.1
455     Adds the given (perl) program(s) to the file set, that is, a program
456     installed by some perl module, written in perl (an example would be the
457     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
458     distribution).
459    
460 root 1.5 Example: add the deliantra client program installed by the
461     L<Deliantra::Client> module.
462 root 1.1
463 root 1.5 $extractor->add_script ("deliantra");
464 root 1.1
465     =cut
466    
467 root 1.5 sub add_script {
468 root 1.1 my $self = shift;
469    
470     exe:
471     for my $exe (@_) {
472     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
473     if (open my $fh, "<:perlio", "$dir/$exe") {
474    
475     my $file = do { local $/; readline $fh };
476    
477     $self->_trace_flush if exists $self->{trace_check};
478     $self->{trace_check} = $file;
479    
480 root 1.7 $self->{set}{"$self->{bindir}/$exe"} = ["$dir/$exe"];
481 root 1.1 next exe;
482     }
483     }
484    
485 root 1.5 croak "add_script ($exe): executable not found";
486 root 1.1 }
487     }
488    
489     =item $extractor->add_eval ($string)
490    
491 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
492     by it. For example, this would add L<AnyEvent> and the default backend
493     implementation module and event loop module:
494 root 1.1
495     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
496    
497 root 1.5 Each code snippet will be executed in its own package and under C<use
498     strict>.
499    
500 root 1.1 =cut
501    
502     sub add_eval {
503     my ($self, $eval) = @_;
504    
505 root 1.5 my $pkg = "libextractor" . ++$self->{count};
506 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
507 root 1.3 $self->_trace ($eval,
508     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
509 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
510 root 1.3 );
511 root 1.1 }
512    
513 root 1.5 =back
514    
515     =head2 OTHER METHODS FOR ADDING FILES
516    
517     The following methods add commonly used files that are either not covered
518     by other methods or add commonly-used dependencies.
519    
520     =over 4
521    
522 root 1.1 =item $extractor->add_perl
523    
524     Adds the perl binary itself to the file set, including the libperl dll, if
525     needed.
526    
527 root 1.5 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
528     some F<lib/libperl.so.XXX>.
529    
530 root 1.4 =cut
531    
532     sub add_perl {
533     my ($self) = @_;
534    
535 root 1.7 $self->{set}{"$self->{exedir}/perl$Config{_exe}"} = [_perl_path];
536 root 1.4
537     # on debian, we have the special case of a perl binary linked against
538     # a static libperl.a (which is not available), but the Config says to use
539     # a shared library, which is in the wrong directory, too (which breaks
540     # every other perl installation on the system - they are so stupid).
541    
542     # that means we can't find the libperl.so, because dbeian actively breaks
543     # their perl install, and we don't need it. we work around this by silently
544     # not including the libperl if we cannot find it.
545    
546     if ($Config{useshrplib} eq "true") {
547 root 1.8 my ($libperl, $libpath);
548    
549     if ($^O eq "cygwin") {
550     $libperl = $Config{libperl};
551     $libpath = "$Config{binexp}/$libperl";
552     } elsif ($^O eq "MSWin32") {
553     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
554     $libpath = "$Config{binexp}/$libperl";
555     } else {
556     $libperl = $Config{libperl};
557     $libpath = $self->{lib}{"CORE/$libperl"};
558 root 1.4 }
559 root 1.8
560     $self->{set}{"$self->{dlldir}/$libperl"} = $libpath
561     if length $libpath && -e $libpath;
562 root 1.4 }
563     }
564 root 1.3
565     =item $extractor->add_core_support
566    
567     Try to add modules and files needed to support commonly-used builtin
568     language features. For example to open a scalar for I/O you need the
569     L<PerlIO::scalar> module:
570    
571     open $fh, "<", \$scalar
572    
573     A number of regex and string features (e.g. C<ucfirst>) need some unicore
574     files, e.g.:
575    
576     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
577    
578     This call adds these files (simply by executing code similar to the above
579     code fragments).
580    
581     Notable things that are missing are other PerlIO layers, such as
582     L<PerlIO::encoding>, and named character and character class matches.
583    
584     =cut
585    
586     sub add_core_support {
587     my ($self) = @_;
588    
589     $self->add_eval ('my $v; open my $fh, "<", \$v');
590     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
591     }
592 root 1.1
593 root 1.3 =item $extractor->add_unicore
594    
595 root 1.5 Adds (hopefully) all files form the unicore database that will ever be
596     needed.
597    
598     If you are not sure which unicode character classes and similar unicore
599     databases you need, and you do not care about an extra one thousand(!)
600     files comprising 4MB of data, then you can just call this method, which
601     adds basically all files from perl's unicode database.
602    
603     =cut
604    
605     sub add_unicore {
606     my ($self) = @_;
607    
608     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
609     }
610    
611     =back
612    
613     =head2 GLOB-BASED ADDING AND FILTERING
614 root 1.1
615 root 1.5 These methods add or manipulate files by using glob-based patterns.
616    
617     These glob patterns work similarly to glob patterns in the shell:
618    
619     =over 4
620    
621     =item /
622    
623     A F</> at the start of the pattern interprets the pattern as a file
624     path inside the file set, almost the same as in the shell. For example,
625     F</bin/perl*> would match all files whose names starting with F<perl>
626     inside the F<bin> directory in the set.
627    
628     If the F</> is missing, then the pattern is interpreted as a module name
629     (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
630     (where F<libdir> is the perl library directory), while F<Coro::*> would
631     match F<libdir/Coro/*.pm>.
632    
633     =item *
634    
635     A single star matches anything inside a single directory component. For
636     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
637     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
638 root 1.1
639 root 1.5 Another way to look at it is that a single star matches anything but a
640     slash (F</>).
641 root 1.1
642 root 1.5 =item **
643 root 1.1
644 root 1.5 A double star matches any number of characters in the path, including F</>.
645 root 1.1
646 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
647     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
648 root 1.1
649 root 1.5 =back
650 root 1.1
651 root 1.2 =cut
652    
653     sub _extglob2re {
654 root 1.5 for (quotemeta $_[1]) {
655 root 1.2 s/\\\*\\\*/.*/g;
656     s/\\\*/[^\/]*/g;
657     s/\\\?/[^\/]/g;
658    
659     unless (s%^\\/%%) {
660     s%\\:\\:%/%g;
661 root 1.5 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
662 root 1.2 }
663    
664     $_ .= '$';
665     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
666    
667     return qr<^$_>s
668     }
669     }
670    
671 root 1.5 =over 4
672    
673     =item $extractor->add_glob ($modglob[, $modglob...])
674    
675     Adds all files from the perl library that match the given glob pattern.
676    
677     For example, you could implement C<add_unicore> yourself like this:
678    
679     $extractor->add_glob ("/unicore/**.pl");
680    
681     =cut
682    
683     sub add_glob {
684     my $self = shift;
685    
686     for (@_) {
687     my $pat = $self->_extglob2re ($_);
688     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
689     }
690     }
691    
692     =item $extractor->filter ($pattern[, $pattern...])
693    
694     Applies a series of include/exclude filters. Each filter must start with
695     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
696     pattern. The rest of the pattern is a normal glob pattern.
697    
698     An exclude pattern (C<->) instantly removes all matching files from
699     the set. An include pattern (C<+>) protects matching files from later
700     removals.
701    
702     That is, if you have an include pattern then all files that were matched
703     by it will be included in the set, regardless of any further exclude
704     patterns matching the same files.
705    
706     Likewise, any file excluded by a pattern will not be included in the set,
707     even if matched by later include patterns.
708    
709     Any files not matched by any expression will simply stay in the set.
710    
711     For example, to remove most of the useless autoload functions by the POSIX
712     module (they either do the same thing as a builtin or always raise an
713     error), you would use this (assuming a default C<libdir>):
714    
715     $extractor->filter ("-/lib/auto/POSIX/*.al");
716    
717     This does not remove all autoload files, only the ones not defined by a
718     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
719    
720     =cut
721    
722 root 1.2 sub filter {
723     my ($self, @patterns) = @_;
724    
725 root 1.3 $self->_trace_flush;
726    
727     my $set = $self->{set};
728     my %include;
729 root 1.2
730     for my $pat (@patterns) {
731     $pat =~ s/^([+\-])//
732     or croak "$_: not a valid filter pattern (missing + or - prefix)";
733     my $inc = $1 eq "+";
734     $pat = $self->_extglob2re ($pat);
735 root 1.3
736     my @match = grep /$pat/, keys %$set;
737    
738     if ($inc) {
739     @include{@match} = delete @$set{@match};
740     } else {
741     delete @$set{@{ $_->[I_DEP] }} # remove dependents
742     for delete @$set{@match};
743     }
744 root 1.2 }
745 root 1.3
746     my @include = keys %include;
747     @$set{@include} = delete @include{@include};
748 root 1.2 }
749 root 1.1
750 root 1.3 =item $extractor->runtime_only
751    
752     This removes all files that are not needed at runtime, such as static
753     archives, header and other files needed only for compilation of modules,
754     and pod and html files (which are unlikely to be needed at runtime).
755 root 1.1
756 root 1.8 This is quite useful when you want to have only files actually needed to
757 root 1.3 execute a program.
758    
759     =cut
760    
761     sub runtime_only {
762     my ($self) = @_;
763    
764     $self->_trace_flush;
765    
766     my $set = $self->{set};
767    
768 root 1.8 # delete all static libraries, also windows stuff
769     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
770 root 1.3
771     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
772     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
773    
774     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
775 root 1.8 delete @$set{ grep m%^\Q$self->{libdir}\E/.*\.(?:pod|h|html)$%s, keys %$set };
776 root 1.3 }
777 root 1.1
778     =back
779    
780 root 1.5 =head2 RESULT SET
781    
782     =over 4
783 root 1.1
784 root 1.5 =item $set = $extractor->set
785 root 1.1
786 root 1.5 Returns a hash reference that represents the result set. The hash is the
787     actual internal storage hash and can only be modified as described below.
788 root 1.1
789 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
790     e.g.:
791 root 1.1
792 root 1.5 bin/perl
793     lib/unicore/lib/Blk/Superscr.pl
794     lib/AnyEvent/Impl/EV.pm
795    
796     The value is an array reference with mostly unspecified contents, except
797     the first element, which is the file system path where the actual file can
798     be found.
799    
800     This code snippet lists all files inside the set:
801    
802     print "$_\n"
803     for sort keys %{ $extractor->set });
804    
805     This code fragment prints C<< filesystem_path => set_path >> pairs for all
806     files in the set:
807    
808     my $set = $extractor->set;
809     while (my ($set,$fspath) = each %$set) {
810     print "$fspath => $set\n";
811     }
812 root 1.1
813 root 1.5 You can implement your own filtering by asking for the result set with
814     C<< $extractor->set >>, and then deleting keys from the referenced hash
815     - since you can ask for the result set at any time you can add things,
816     filter them out this way, and add additional things.
817    
818     =back
819    
820     =cut
821    
822     sub set {
823 root 1.6 $_[0]->_trace_flush;
824 root 1.5 $_[0]{set}
825     }
826    
827     =head1 EXAMPLE
828    
829     To package he deliantra client (L<Deliantra::Client>), finding all
830     (perl) files needed to run it is a first step. This can be done by using
831     something like the following code snippet:
832    
833     my $ex = new Perl::LibExtractor
834     exedir => ".", dlldir => ".",
835     libdir => "pm", bindir => "pm/bin";
836    
837     $ex->add_perl;
838     $ex->add_core_support;
839     $ex->add_script ("deliantra");
840     $ex->add_mod ("AnyEvent::Impl::EV");
841     $ex->add_mod ("AnyEvent::Impl::Perl");
842     $ex->add_mod ("Urlader");
843     $ex->filter ("-/*/auto/POSIX/**.al");
844     $ex->runtime_only;
845    
846     Let's first find out about the choice of paths for the subset. The
847     Deliantra client binary packages use L<Urlader> nowadays, and there it is
848     convenient to have F<perl> and any shared libraries directly in the root
849     of the distribution.
850    
851     The perl library files are put into a directory named F<pm>, simply
852     because it's shorter than F<lib>, and in the future, some files might go
853     into F<lib>.
854    
855     And finally, the F<deliantra> script itself is put into the perl library
856     directory, because it is not run directly - the installed client uses the
857     system fonts and other resources, while the binary package is supposed
858     to use the files packaged with it. To achieve this, a wrapper script is
859     created, called F<run>; which displays a splash screen and configures the
860     environment. A simplified version of it could look like this:
861    
862     @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
863     $ENV{PANGO_RC_FILE} = "pango.rc";
864     require "bin/deliantra";
865     exit 0;
866    
867     First it sets the perl library directory to F<pm> and F<.> (the latter
868     to work around some AutoLoader bugs), so perl uses only the perl library
869     files that came with the binary package.
870    
871     Then it sets some environment variable to override the system default
872     (which might be incompatible).
873    
874     Then it runs the client itself, using C<require>. Since C<require> only
875     looks in the perl library directory this is the reaosn why the scripts
876     were put there (of course, since F<.> is also included it doesn't matter,
877     but I refuse to yield to bugs).
878    
879     Finally it exits with a clean status to signal "ok" to Urlader.
880    
881     Back to the original C<Perl::LibExtractor> script: after initialising a
882     new set, the script simply adds the F<perl> interpreter and core support
883     files (just in case, not all are needed, but some are, and I am too lazy
884     to find out which ones exactly).
885    
886     Then it adds the deliantra executable itself, which in turn adds most of
887     the required modules. After that, the AnyEvent implementation modules are
888     added because these dependencies are not picked up automatically.
889    
890     The L<Urlader> module is added because the client itself does not depend
891     on it at all, but the wrapper does.
892    
893     At this point, all required files are present, and it's time to slim
894     down: most of the ueseless POSIX autoloaded functions are removed,
895     not because they are so big, but because creating files is a costly
896     operation in itself, so even small fiels have considerable overhead when
897     unpacking. Then files not required for running the client are removed.
898    
899     And that concludes it, the set is now ready.
900 root 1.1
901     =head1 SEE ALSO
902    
903     The utility program that comes with this module: L<perl-libextract>.
904    
905     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
906    
907 root 1.5 =head1 LICENSE
908    
909     This software package is licensed under the GPL version 3 or any later
910     version, see COPYING for details.
911    
912     This license does not, of course, apply to any output generated by this
913     software.
914    
915 root 1.1 =head1 AUTHOR
916    
917     Marc Lehmann <schmorp@schmorp.de>
918     http://home.schmorp.de/
919    
920     =cut
921    
922     1;
923