ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.7
Committed: Mon Jan 16 18:46:05 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.6: +20 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.7 my $canonpath = File::Spec->can ("canonpath");
53    
54     sub canonpath($) {
55     local $_ = $canonpath->(File::Spec::, $_[0]);
56     s%\\%/%g;
57     $_
58     }
59    
60 root 1.5 =head2 CREATION
61    
62     =over 4
63    
64 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
65    
66     Creates a new extractor object. Each extractor object stores some
67     configuration options and a subset of files that can be queried at any
68     time,.
69    
70     The following key-value pairs exist, with default values as specified.
71    
72     =over 4
73    
74     =item exedir => "bin"
75    
76 root 1.5 The prefix to use for the suggested target path for perl executables
77     (scripts). Defaults to F<bin>.
78 root 1.1
79     =item libdir => "lib"
80    
81     The prefix to use for the suggested target path of perl library
82     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
83     etc.). Defaults to F<lib>.
84    
85     =item bindir => "bin"
86    
87     The prefix to use for the suggested target path for (non-perl)
88     executables. Defaults to F<bin>.
89    
90     =item dlldir => "bin"
91    
92     The prefix to use for the suggested target path of any shared
93     libraries. Defaults to F<bin>.
94    
95     =item inc => \@INC without "."
96    
97     An arrayref with paths to perl library directories. The default is
98     C<\@INC>, with F<.> removed.
99    
100     To prepend custom dirs just do this:
101    
102     inc => ["mydir", @INC],
103    
104 root 1.3 =item use_packlist => 1
105 root 1.1
106 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
107 root 1.5 then each time a file is traced, the complete distribution that contains
108     it is included (but not traced).
109    
110     If disabled, only shared objects and autoload files will be added.
111 root 1.1
112     =back
113    
114     =cut
115    
116     sub new {
117     my ($class, %kv) = @_;
118    
119     my $self = bless {
120     exedir => "bin",
121     libdir => "lib",
122     bindir => "bin",
123     dlldir => "bin",
124     inc => [grep $_ ne ".", @INC],
125 root 1.3 use_packlist => 1,
126 root 1.1 %kv,
127     set => {},
128     }, $class;
129    
130     my %inc_seen;
131 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
132 root 1.1 $self->{inc} = \@inc;
133    
134     $self->_set_inc;
135    
136     $self
137     }
138    
139 root 1.4 sub _perl_path() {
140     my $secure_perl_path = $Config{perlpath};
141    
142     if ($^O ne 'VMS') {
143     $secure_perl_path .= $Config{_exe}
144     unless $secure_perl_path =~ m/$Config{_exe}$/i;
145     }
146    
147     $secure_perl_path
148     }
149    
150 root 1.1 sub _set_inc {
151     my ($self) = @_;
152    
153 root 1.7 $self->{inc} = [ map canonpath $_, @{ $self->{inc }} ];
154    
155 root 1.2 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
156 root 1.7
157     $matchprefix =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
158     $matchprefix = qr<$matchprefix>i if File::Spec->case_tolerant;
159    
160 root 1.2 $matchprefix = qr<^(?:$matchprefix)/>;
161    
162 root 1.1 my %lib;
163 root 1.2 my @packlists;
164 root 1.1
165     # find all files in all libdirs, earlier ones overwrite later ones
166 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
167    
168     while (@scan) {
169     my ($root, $dir) = @{ pop @scan };
170    
171     my $pfx = length $dir ? "$dir/" : "";
172    
173     for (do {
174     opendir my $fh, "$root/$dir"
175     or croak "$root/$dir: $!";
176     grep !/^\.\.?$/, readdir $fh
177     }) {
178     if (-d "$root/$dir/$_/.") {
179     $lib{"$pfx$_/"} = "$root/$pfx$_";
180     push @scan, [$root, "$pfx$_"];
181     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
182     push @packlists, [$root, $pfx];
183     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
184     # skip empty .bs files
185     # } elsif (/\.(?:pod|h|html)$/) {
186     # # not interested in those
187     } else {
188     #push @files, $_;
189     $lib{"$pfx$_"} = "$root/$pfx$_";
190 root 1.1 }
191 root 1.2 }
192    
193     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
194     }
195    
196     my %packlist;
197 root 1.1
198 root 1.2 # need to go forward here
199     for (@packlists) {
200     my ($root, $auto) = @$_;
201    
202     my @packlist;
203    
204     open my $fh, "<:perlio", "$root/$auto/.packlist"
205     or die "$root/$auto/.packlist: $!";
206    
207     $root = qr<^\Q$root/>;
208    
209     while (<$fh>) {
210     chomp;
211     s/ .*$//; # newer-style .packlists might contain key=value pairs
212    
213     s/$root// or next;
214     exists $lib{$_} or next;
215 root 1.1
216 root 1.2 push @packlist, $_;
217     $packlist{$_} = \@packlist;
218     }
219 root 1.1 }
220    
221     $self->{lib} = \%lib;
222 root 1.2 $self->{packlist} = \%packlist;
223     $self->{matchprefix} = $matchprefix;
224 root 1.1 }
225    
226 root 1.5 =back
227    
228     =head2 TRACE/PACKLIST BASED ADDING
229    
230     The following methods add various things to the set of files.
231    
232     Each time a perl file is added, it is scanned by tracing either loading,
233     execution or compiling it, and seeing which other perl modules and
234     libraries have been loaded.
235    
236     For each library file found this way, additional dependencies are added:
237     if packlists are enabled, then all files of the distribution that contains
238     the file will be added. If packlists are disabled, then only shared
239     objects and autoload files for modules will be added.
240    
241     Only files from perl library directories will be added automatically. Any
242     other files (such as manpages or scripts installed in the F<bin>
243     directory) are skipped.
244    
245     If there is an error, such as a module not being found, then this module
246     croaks (as opposed to silently skipping). If you want to add something of
247     which you are not sure it exists, then you can wrap the call into C<eval
248     {}>. In some cases, you can avoid this by executing the code you want
249     to work later using C<add_eval> - see C<add_core_support> for an actual
250     example of this technique.
251    
252     Note that packlists are meant to add files not covered by other
253     mechanisms, such as resource files and other data files loaded directly by
254     a module - they are not meant to add dependencies that are missed because
255     they only happen at runtime.
256    
257     For example, with packlists, when using L<AnyEvent>, then all event loop
258     backends are automatically added as well, but I<not> any event loops
259     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
260     packlists, only the backend that is being used is added (i.e. normally
261     none, as loading AnyEvent does not instantly load any backend).
262    
263     To catch the extra event loop dependencies, you can either initialise
264     AnyEvent so it picks a suitable backend:
265    
266     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
267    
268     Or you can directly load the backend modules you plan to use:
269    
270     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
271    
272     An example of a program (or module) that has extra resource files is
273     L<Deliantra::Client> - the normal tracing (without packlist usage) will
274     correctly add all submodules, but miss the fonts and textures. By using
275     the packlist, those files are added correctly.
276    
277     =over 4
278    
279     =cut
280    
281 root 1.1 sub _add {
282     my ($self, $add) = @_;
283    
284     my $lib = $self->{lib};
285     my $path;
286    
287     for (@$add) {
288     $path = "$self->{libdir}/$_";
289    
290     $self->{set}{$path} ||= do {
291     my @info;
292    
293     $info[I_SRC] = $lib->{$_}
294     or croak "$_: unable to locate file in perl library";
295    
296 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
297 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
298     for @{ $self->{packlist}{$_} };
299 root 1.3
300     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
301     # s/\.pm$//;
302     # s%/%::%g;
303     # my $pkg = "libextractor" . ++$self->{count};
304     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
305     # unless $self->{_add_do}{$_}++;
306     # }
307     #
308     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
309     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
310    
311 root 1.2 } elsif (/^(.*)\.pm$/) {
312 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
313     $auto =~ m%/([^/]+)/$% or die;
314     my $base = $1;
315    
316     if (exists $lib->{$auto}) {
317     # auto dir exists, scan it for cool stuff
318    
319     # 1. shared object, others are of no interest to us
320     my $so = "$auto$base.$Config{dlext}";
321     if (my $src = $lib->{$so}) {
322     $so = "$self->{libdir}/$so";
323     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
324     }
325    
326     # 2. autoloader/autosplit
327     my $ix = "${auto}autosplit.ix";
328     if (my $src = $lib->{$ix}) {
329     $ix = "$self->{libdir}/$ix";
330     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
331    
332     open my $fh, "<:perlio", $src
333     or croak "$src: $!";
334    
335     my $package;
336    
337     while (<$fh>) {
338     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
339     my $al = "auto/$package/$1.al";
340     my $src = $lib->{$al}
341     or croak "$al: autoload file not found, but should be there.";
342    
343     $al = "$self->{libdir}/$al";
344     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
345    
346     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
347     ($package = $1) =~ s/::/\//g;
348     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
349     # nop
350     } else {
351     warn "WARNING: $src: unparsable line, please report: $_";
352     }
353     }
354     }
355    
356     skip:
357     }
358     }
359    
360     \@info
361     };
362     }
363     }
364    
365     sub _trace {
366     my ($self, $file, $eval) = @_;
367    
368 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
369 root 1.1 }
370    
371     sub _trace_flush {
372     my ($self) = @_;
373    
374 root 1.3 # ->_add might add additional files to trace
375     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
376     my $tmpdir = newdir File::Temp;
377     my $dir = $tmpdir->dirname;
378    
379     open my $fh, ">:perlio", "$dir/eval"
380     or croak "$dir/eval: $!";
381     syswrite $fh,
382     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
383     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
384     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
385     . "CHECK {\n"
386     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
387     . 'print join "\x00", values %INC;'
388     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
389     . "}\n"
390     . (delete $self->{trace_check});
391     close $fh;
392    
393 root 1.4 system _perl_path, "-c", "$dir/eval"
394 root 1.3 and croak "trace failure, check trace process output - caught";
395 root 1.1
396 root 1.3 my @inc = split /\x00/, do {
397     open my $fh, "<:perlio", "$dir/out"
398     or croak "$dir/out: $!";
399     local $/;
400     scalar readline $fh
401     };
402 root 1.1
403 root 1.7 $_ = canonpath $_
404     for @inc;
405    
406 root 1.3 my $matchprefix = $self->{matchprefix};
407 root 1.1
408 root 1.3 # remove the library directory prefix, hope for the best
409     s/$matchprefix//
410     or croak "$_: file outside any library directory"
411     for @inc;
412 root 1.1
413 root 1.3 $self->_add (\@inc);
414     }
415 root 1.1 }
416    
417     =item $extractor->add_mod ($module[, $module...])
418    
419     Adds the given module(s) to the file set - the module name must be specified
420     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
421    
422     The program will be loaded with the default import list, any dependent
423     files, such as the shared object implementing xs functions, or autoload
424     files, will also be added.
425    
426 root 1.5 If you want to use a different import list (for those rare modules wghere
427     import lists trigger different backend modules to be loaded for example),
428     you can use C<add_eval> instead:
429    
430     $extractor->add_eval ("use Module qw(a b c)");
431    
432 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
433     from the distribution they are part of.
434    
435     $extractor->add_mod ("Coro", "AnyEvent::AIO");
436    
437     =cut
438    
439     sub add_mod {
440     my $self = shift;
441    
442 root 1.3 for (@_) {
443     my $pkg = "libextractor" . ++$self->{count};
444     $self->_trace ("use $_", "{ package $pkg; use $_ }")
445     unless $self->{add_mod}{$_}++;
446     }
447 root 1.1 }
448    
449 root 1.5 =item $extractor->add_script ($name[, $name...])
450 root 1.1
451     Adds the given (perl) program(s) to the file set, that is, a program
452     installed by some perl module, written in perl (an example would be the
453     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
454     distribution).
455    
456 root 1.5 Example: add the deliantra client program installed by the
457     L<Deliantra::Client> module.
458 root 1.1
459 root 1.5 $extractor->add_script ("deliantra");
460 root 1.1
461     =cut
462    
463 root 1.5 sub add_script {
464 root 1.1 my $self = shift;
465    
466     exe:
467     for my $exe (@_) {
468     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
469     if (open my $fh, "<:perlio", "$dir/$exe") {
470    
471     my $file = do { local $/; readline $fh };
472    
473     $self->_trace_flush if exists $self->{trace_check};
474     $self->{trace_check} = $file;
475    
476 root 1.7 $self->{set}{"$self->{bindir}/$exe"} = ["$dir/$exe"];
477 root 1.1 next exe;
478     }
479     }
480    
481 root 1.5 croak "add_script ($exe): executable not found";
482 root 1.1 }
483     }
484    
485     =item $extractor->add_eval ($string)
486    
487 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
488     by it. For example, this would add L<AnyEvent> and the default backend
489     implementation module and event loop module:
490 root 1.1
491     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
492    
493 root 1.5 Each code snippet will be executed in its own package and under C<use
494     strict>.
495    
496 root 1.1 =cut
497    
498     sub add_eval {
499     my ($self, $eval) = @_;
500    
501 root 1.5 my $pkg = "libextractor" . ++$self->{count};
502 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
503 root 1.3 $self->_trace ($eval,
504     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
505 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
506 root 1.3 );
507 root 1.1 }
508    
509 root 1.5 =back
510    
511     =head2 OTHER METHODS FOR ADDING FILES
512    
513     The following methods add commonly used files that are either not covered
514     by other methods or add commonly-used dependencies.
515    
516     =over 4
517    
518 root 1.1 =item $extractor->add_perl
519    
520     Adds the perl binary itself to the file set, including the libperl dll, if
521     needed.
522    
523 root 1.5 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
524     some F<lib/libperl.so.XXX>.
525    
526 root 1.4 =cut
527    
528     sub add_perl {
529     my ($self) = @_;
530    
531 root 1.7 $self->{set}{"$self->{exedir}/perl$Config{_exe}"} = [_perl_path];
532 root 1.4
533     # on debian, we have the special case of a perl binary linked against
534     # a static libperl.a (which is not available), but the Config says to use
535     # a shared library, which is in the wrong directory, too (which breaks
536     # every other perl installation on the system - they are so stupid).
537    
538     # that means we can't find the libperl.so, because dbeian actively breaks
539     # their perl install, and we don't need it. we work around this by silently
540     # not including the libperl if we cannot find it.
541    
542     if ($Config{useshrplib} eq "true") {
543     if (my $libperl = $self->{lib}{"CORE/$Config{libperl}"}) {
544     $self->{set}{"$self->{dlldir}/$Config{libperl}"} = $libperl;
545     }
546     }
547     }
548 root 1.3
549     =item $extractor->add_core_support
550    
551     Try to add modules and files needed to support commonly-used builtin
552     language features. For example to open a scalar for I/O you need the
553     L<PerlIO::scalar> module:
554    
555     open $fh, "<", \$scalar
556    
557     A number of regex and string features (e.g. C<ucfirst>) need some unicore
558     files, e.g.:
559    
560     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
561    
562     This call adds these files (simply by executing code similar to the above
563     code fragments).
564    
565     Notable things that are missing are other PerlIO layers, such as
566     L<PerlIO::encoding>, and named character and character class matches.
567    
568     =cut
569    
570     sub add_core_support {
571     my ($self) = @_;
572    
573     $self->add_eval ('my $v; open my $fh, "<", \$v');
574     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
575     }
576 root 1.1
577 root 1.3 =item $extractor->add_unicore
578    
579 root 1.5 Adds (hopefully) all files form the unicore database that will ever be
580     needed.
581    
582     If you are not sure which unicode character classes and similar unicore
583     databases you need, and you do not care about an extra one thousand(!)
584     files comprising 4MB of data, then you can just call this method, which
585     adds basically all files from perl's unicode database.
586    
587     =cut
588    
589     sub add_unicore {
590     my ($self) = @_;
591    
592     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
593     }
594    
595     =back
596    
597     =head2 GLOB-BASED ADDING AND FILTERING
598 root 1.1
599 root 1.5 These methods add or manipulate files by using glob-based patterns.
600    
601     These glob patterns work similarly to glob patterns in the shell:
602    
603     =over 4
604    
605     =item /
606    
607     A F</> at the start of the pattern interprets the pattern as a file
608     path inside the file set, almost the same as in the shell. For example,
609     F</bin/perl*> would match all files whose names starting with F<perl>
610     inside the F<bin> directory in the set.
611    
612     If the F</> is missing, then the pattern is interpreted as a module name
613     (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
614     (where F<libdir> is the perl library directory), while F<Coro::*> would
615     match F<libdir/Coro/*.pm>.
616    
617     =item *
618    
619     A single star matches anything inside a single directory component. For
620     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
621     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
622 root 1.1
623 root 1.5 Another way to look at it is that a single star matches anything but a
624     slash (F</>).
625 root 1.1
626 root 1.5 =item **
627 root 1.1
628 root 1.5 A double star matches any number of characters in the path, including F</>.
629 root 1.1
630 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
631     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
632 root 1.1
633 root 1.5 =back
634 root 1.1
635 root 1.2 =cut
636    
637     sub _extglob2re {
638 root 1.5 for (quotemeta $_[1]) {
639 root 1.2 s/\\\*\\\*/.*/g;
640     s/\\\*/[^\/]*/g;
641     s/\\\?/[^\/]/g;
642    
643     unless (s%^\\/%%) {
644     s%\\:\\:%/%g;
645 root 1.5 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
646 root 1.2 }
647    
648     $_ .= '$';
649     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
650    
651     return qr<^$_>s
652     }
653     }
654    
655 root 1.5 =over 4
656    
657     =item $extractor->add_glob ($modglob[, $modglob...])
658    
659     Adds all files from the perl library that match the given glob pattern.
660    
661     For example, you could implement C<add_unicore> yourself like this:
662    
663     $extractor->add_glob ("/unicore/**.pl");
664    
665     =cut
666    
667     sub add_glob {
668     my $self = shift;
669    
670     for (@_) {
671     my $pat = $self->_extglob2re ($_);
672     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
673     }
674     }
675    
676     =item $extractor->filter ($pattern[, $pattern...])
677    
678     Applies a series of include/exclude filters. Each filter must start with
679     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
680     pattern. The rest of the pattern is a normal glob pattern.
681    
682     An exclude pattern (C<->) instantly removes all matching files from
683     the set. An include pattern (C<+>) protects matching files from later
684     removals.
685    
686     That is, if you have an include pattern then all files that were matched
687     by it will be included in the set, regardless of any further exclude
688     patterns matching the same files.
689    
690     Likewise, any file excluded by a pattern will not be included in the set,
691     even if matched by later include patterns.
692    
693     Any files not matched by any expression will simply stay in the set.
694    
695     For example, to remove most of the useless autoload functions by the POSIX
696     module (they either do the same thing as a builtin or always raise an
697     error), you would use this (assuming a default C<libdir>):
698    
699     $extractor->filter ("-/lib/auto/POSIX/*.al");
700    
701     This does not remove all autoload files, only the ones not defined by a
702     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
703    
704     =cut
705    
706 root 1.2 sub filter {
707     my ($self, @patterns) = @_;
708    
709 root 1.3 $self->_trace_flush;
710    
711     my $set = $self->{set};
712     my %include;
713 root 1.2
714     for my $pat (@patterns) {
715     $pat =~ s/^([+\-])//
716     or croak "$_: not a valid filter pattern (missing + or - prefix)";
717     my $inc = $1 eq "+";
718     $pat = $self->_extglob2re ($pat);
719 root 1.3
720     my @match = grep /$pat/, keys %$set;
721    
722     if ($inc) {
723     @include{@match} = delete @$set{@match};
724     } else {
725     delete @$set{@{ $_->[I_DEP] }} # remove dependents
726     for delete @$set{@match};
727     }
728 root 1.2 }
729 root 1.3
730     my @include = keys %include;
731     @$set{@include} = delete @include{@include};
732 root 1.2 }
733 root 1.1
734 root 1.3 =item $extractor->runtime_only
735    
736     This removes all files that are not needed at runtime, such as static
737     archives, header and other files needed only for compilation of modules,
738     and pod and html files (which are unlikely to be needed at runtime).
739 root 1.1
740 root 1.3 This is quite useful when you want to have only fiels actually needed to
741     execute a program.
742    
743     =cut
744    
745     sub runtime_only {
746     my ($self) = @_;
747    
748     $self->_trace_flush;
749    
750     my $set = $self->{set};
751    
752     # delete all static libraries
753     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
754    
755     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
756     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
757    
758     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
759     delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
760     }
761 root 1.1
762     =back
763    
764 root 1.5 =head2 RESULT SET
765    
766     =over 4
767 root 1.1
768 root 1.5 =item $set = $extractor->set
769 root 1.1
770 root 1.5 Returns a hash reference that represents the result set. The hash is the
771     actual internal storage hash and can only be modified as described below.
772 root 1.1
773 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
774     e.g.:
775 root 1.1
776 root 1.5 bin/perl
777     lib/unicore/lib/Blk/Superscr.pl
778     lib/AnyEvent/Impl/EV.pm
779    
780     The value is an array reference with mostly unspecified contents, except
781     the first element, which is the file system path where the actual file can
782     be found.
783    
784     This code snippet lists all files inside the set:
785    
786     print "$_\n"
787     for sort keys %{ $extractor->set });
788    
789     This code fragment prints C<< filesystem_path => set_path >> pairs for all
790     files in the set:
791    
792     my $set = $extractor->set;
793     while (my ($set,$fspath) = each %$set) {
794     print "$fspath => $set\n";
795     }
796 root 1.1
797 root 1.5 You can implement your own filtering by asking for the result set with
798     C<< $extractor->set >>, and then deleting keys from the referenced hash
799     - since you can ask for the result set at any time you can add things,
800     filter them out this way, and add additional things.
801    
802     =back
803    
804     =cut
805    
806     sub set {
807 root 1.6 $_[0]->_trace_flush;
808 root 1.5 $_[0]{set}
809     }
810    
811     =head1 EXAMPLE
812    
813     To package he deliantra client (L<Deliantra::Client>), finding all
814     (perl) files needed to run it is a first step. This can be done by using
815     something like the following code snippet:
816    
817     my $ex = new Perl::LibExtractor
818     exedir => ".", dlldir => ".",
819     libdir => "pm", bindir => "pm/bin";
820    
821     $ex->add_perl;
822     $ex->add_core_support;
823     $ex->add_script ("deliantra");
824     $ex->add_mod ("AnyEvent::Impl::EV");
825     $ex->add_mod ("AnyEvent::Impl::Perl");
826     $ex->add_mod ("Urlader");
827     $ex->filter ("-/*/auto/POSIX/**.al");
828     $ex->runtime_only;
829    
830     Let's first find out about the choice of paths for the subset. The
831     Deliantra client binary packages use L<Urlader> nowadays, and there it is
832     convenient to have F<perl> and any shared libraries directly in the root
833     of the distribution.
834    
835     The perl library files are put into a directory named F<pm>, simply
836     because it's shorter than F<lib>, and in the future, some files might go
837     into F<lib>.
838    
839     And finally, the F<deliantra> script itself is put into the perl library
840     directory, because it is not run directly - the installed client uses the
841     system fonts and other resources, while the binary package is supposed
842     to use the files packaged with it. To achieve this, a wrapper script is
843     created, called F<run>; which displays a splash screen and configures the
844     environment. A simplified version of it could look like this:
845    
846     @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
847     $ENV{PANGO_RC_FILE} = "pango.rc";
848     require "bin/deliantra";
849     exit 0;
850    
851     First it sets the perl library directory to F<pm> and F<.> (the latter
852     to work around some AutoLoader bugs), so perl uses only the perl library
853     files that came with the binary package.
854    
855     Then it sets some environment variable to override the system default
856     (which might be incompatible).
857    
858     Then it runs the client itself, using C<require>. Since C<require> only
859     looks in the perl library directory this is the reaosn why the scripts
860     were put there (of course, since F<.> is also included it doesn't matter,
861     but I refuse to yield to bugs).
862    
863     Finally it exits with a clean status to signal "ok" to Urlader.
864    
865     Back to the original C<Perl::LibExtractor> script: after initialising a
866     new set, the script simply adds the F<perl> interpreter and core support
867     files (just in case, not all are needed, but some are, and I am too lazy
868     to find out which ones exactly).
869    
870     Then it adds the deliantra executable itself, which in turn adds most of
871     the required modules. After that, the AnyEvent implementation modules are
872     added because these dependencies are not picked up automatically.
873    
874     The L<Urlader> module is added because the client itself does not depend
875     on it at all, but the wrapper does.
876    
877     At this point, all required files are present, and it's time to slim
878     down: most of the ueseless POSIX autoloaded functions are removed,
879     not because they are so big, but because creating files is a costly
880     operation in itself, so even small fiels have considerable overhead when
881     unpacking. Then files not required for running the client are removed.
882    
883     And that concludes it, the set is now ready.
884 root 1.1
885     =head1 SEE ALSO
886    
887     The utility program that comes with this module: L<perl-libextract>.
888    
889     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
890    
891 root 1.5 =head1 LICENSE
892    
893     This software package is licensed under the GPL version 3 or any later
894     version, see COPYING for details.
895    
896     This license does not, of course, apply to any output generated by this
897     software.
898    
899 root 1.1 =head1 AUTHOR
900    
901     Marc Lehmann <schmorp@schmorp.de>
902     http://home.schmorp.de/
903    
904     =cut
905    
906     1;
907