ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.10
Committed: Tue Jan 17 17:44:09 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.9: +2 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72     The following key-value pairs exist, with default values as specified.
73    
74     =over 4
75    
76     =item exedir => "bin"
77    
78 root 1.5 The prefix to use for the suggested target path for perl executables
79     (scripts). Defaults to F<bin>.
80 root 1.1
81     =item libdir => "lib"
82    
83     The prefix to use for the suggested target path of perl library
84     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
85     etc.). Defaults to F<lib>.
86    
87     =item bindir => "bin"
88    
89     The prefix to use for the suggested target path for (non-perl)
90     executables. Defaults to F<bin>.
91    
92     =item dlldir => "bin"
93    
94     The prefix to use for the suggested target path of any shared
95     libraries. Defaults to F<bin>.
96    
97     =item inc => \@INC without "."
98    
99     An arrayref with paths to perl library directories. The default is
100     C<\@INC>, with F<.> removed.
101    
102     To prepend custom dirs just do this:
103    
104     inc => ["mydir", @INC],
105    
106 root 1.3 =item use_packlist => 1
107 root 1.1
108 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
109 root 1.5 then each time a file is traced, the complete distribution that contains
110     it is included (but not traced).
111    
112     If disabled, only shared objects and autoload files will be added.
113 root 1.1
114     =back
115    
116     =cut
117    
118     sub new {
119     my ($class, %kv) = @_;
120    
121     my $self = bless {
122     exedir => "bin",
123     libdir => "lib",
124     bindir => "bin",
125     dlldir => "bin",
126     inc => [grep $_ ne ".", @INC],
127 root 1.3 use_packlist => 1,
128 root 1.1 %kv,
129     set => {},
130     }, $class;
131    
132     my %inc_seen;
133 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
134 root 1.1 $self->{inc} = \@inc;
135    
136     $self->_set_inc;
137    
138     $self
139     }
140    
141 root 1.4 sub _perl_path() {
142     my $secure_perl_path = $Config{perlpath};
143    
144     if ($^O ne 'VMS') {
145     $secure_perl_path .= $Config{_exe}
146     unless $secure_perl_path =~ m/$Config{_exe}$/i;
147     }
148    
149     $secure_perl_path
150     }
151    
152 root 1.8 sub _path2match {
153     my $re = join "|", map "\Q$_", @_;
154    
155     $re = "(?:$re)\\/";
156     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
157    
158     $case_tolerant
159     ? qr<$re>i
160     : qr<$re>
161     }
162    
163 root 1.1 sub _set_inc {
164     my ($self) = @_;
165    
166 root 1.8 my $matchprefix = _path2match @{ $self->{inc }};
167 root 1.2
168 root 1.1 my %lib;
169 root 1.2 my @packlists;
170 root 1.1
171     # find all files in all libdirs, earlier ones overwrite later ones
172 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
173    
174     while (@scan) {
175     my ($root, $dir) = @{ pop @scan };
176    
177     my $pfx = length $dir ? "$dir/" : "";
178    
179     for (do {
180     opendir my $fh, "$root/$dir"
181     or croak "$root/$dir: $!";
182     grep !/^\.\.?$/, readdir $fh
183     }) {
184     if (-d "$root/$dir/$_/.") {
185     $lib{"$pfx$_/"} = "$root/$pfx$_";
186     push @scan, [$root, "$pfx$_"];
187     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
188     push @packlists, [$root, $pfx];
189     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
190     # skip empty .bs files
191     # } elsif (/\.(?:pod|h|html)$/) {
192     # # not interested in those
193     } else {
194     #push @files, $_;
195     $lib{"$pfx$_"} = "$root/$pfx$_";
196 root 1.1 }
197 root 1.2 }
198    
199     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
200     }
201    
202     my %packlist;
203 root 1.1
204 root 1.2 # need to go forward here
205     for (@packlists) {
206     my ($root, $auto) = @$_;
207    
208     my @packlist;
209    
210     open my $fh, "<:perlio", "$root/$auto/.packlist"
211     or die "$root/$auto/.packlist: $!";
212    
213 root 1.8 $root = _path2match $root;
214 root 1.2
215     while (<$fh>) {
216     chomp;
217     s/ .*$//; # newer-style .packlists might contain key=value pairs
218    
219     s/$root// or next;
220 root 1.8 $_ = canonpath $_;
221 root 1.2 exists $lib{$_} or next;
222 root 1.1
223 root 1.2 push @packlist, $_;
224     $packlist{$_} = \@packlist;
225     }
226 root 1.1 }
227    
228 root 1.8 $self->{lib} = \%lib;
229     $self->{packlist} = \%packlist;
230 root 1.2 $self->{matchprefix} = $matchprefix;
231 root 1.1 }
232    
233 root 1.5 =back
234    
235     =head2 TRACE/PACKLIST BASED ADDING
236    
237     The following methods add various things to the set of files.
238    
239     Each time a perl file is added, it is scanned by tracing either loading,
240     execution or compiling it, and seeing which other perl modules and
241     libraries have been loaded.
242    
243     For each library file found this way, additional dependencies are added:
244     if packlists are enabled, then all files of the distribution that contains
245     the file will be added. If packlists are disabled, then only shared
246     objects and autoload files for modules will be added.
247    
248     Only files from perl library directories will be added automatically. Any
249     other files (such as manpages or scripts installed in the F<bin>
250     directory) are skipped.
251    
252     If there is an error, such as a module not being found, then this module
253     croaks (as opposed to silently skipping). If you want to add something of
254     which you are not sure it exists, then you can wrap the call into C<eval
255     {}>. In some cases, you can avoid this by executing the code you want
256     to work later using C<add_eval> - see C<add_core_support> for an actual
257     example of this technique.
258    
259     Note that packlists are meant to add files not covered by other
260     mechanisms, such as resource files and other data files loaded directly by
261     a module - they are not meant to add dependencies that are missed because
262     they only happen at runtime.
263    
264     For example, with packlists, when using L<AnyEvent>, then all event loop
265     backends are automatically added as well, but I<not> any event loops
266     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
267     packlists, only the backend that is being used is added (i.e. normally
268     none, as loading AnyEvent does not instantly load any backend).
269    
270     To catch the extra event loop dependencies, you can either initialise
271     AnyEvent so it picks a suitable backend:
272    
273     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
274    
275     Or you can directly load the backend modules you plan to use:
276    
277     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
278    
279     An example of a program (or module) that has extra resource files is
280     L<Deliantra::Client> - the normal tracing (without packlist usage) will
281     correctly add all submodules, but miss the fonts and textures. By using
282     the packlist, those files are added correctly.
283    
284     =over 4
285    
286     =cut
287    
288 root 1.1 sub _add {
289     my ($self, $add) = @_;
290    
291     my $lib = $self->{lib};
292     my $path;
293    
294     for (@$add) {
295     $path = "$self->{libdir}/$_";
296    
297     $self->{set}{$path} ||= do {
298     my @info;
299    
300     $info[I_SRC] = $lib->{$_}
301     or croak "$_: unable to locate file in perl library";
302    
303 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
304 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
305     for @{ $self->{packlist}{$_} };
306 root 1.3
307     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
308     # s/\.pm$//;
309     # s%/%::%g;
310     # my $pkg = "libextractor" . ++$self->{count};
311     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
312     # unless $self->{_add_do}{$_}++;
313     # }
314     #
315     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
316     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
317    
318 root 1.2 } elsif (/^(.*)\.pm$/) {
319 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
320     $auto =~ m%/([^/]+)/$% or die;
321     my $base = $1;
322    
323     if (exists $lib->{$auto}) {
324     # auto dir exists, scan it for cool stuff
325    
326     # 1. shared object, others are of no interest to us
327     my $so = "$auto$base.$Config{dlext}";
328     if (my $src = $lib->{$so}) {
329     $so = "$self->{libdir}/$so";
330     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
331     }
332    
333     # 2. autoloader/autosplit
334     my $ix = "${auto}autosplit.ix";
335     if (my $src = $lib->{$ix}) {
336     $ix = "$self->{libdir}/$ix";
337     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
338    
339     open my $fh, "<:perlio", $src
340     or croak "$src: $!";
341    
342     my $package;
343    
344     while (<$fh>) {
345     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
346     my $al = "auto/$package/$1.al";
347     my $src = $lib->{$al}
348     or croak "$al: autoload file not found, but should be there.";
349    
350     $al = "$self->{libdir}/$al";
351     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
352    
353     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
354     ($package = $1) =~ s/::/\//g;
355     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
356     # nop
357     } else {
358     warn "WARNING: $src: unparsable line, please report: $_";
359     }
360     }
361     }
362    
363     skip:
364     }
365     }
366    
367     \@info
368     };
369     }
370     }
371    
372     sub _trace {
373     my ($self, $file, $eval) = @_;
374    
375 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
376 root 1.1 }
377    
378     sub _trace_flush {
379     my ($self) = @_;
380    
381 root 1.3 # ->_add might add additional files to trace
382     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
383     my $tmpdir = newdir File::Temp;
384     my $dir = $tmpdir->dirname;
385    
386     open my $fh, ">:perlio", "$dir/eval"
387     or croak "$dir/eval: $!";
388     syswrite $fh,
389     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
390     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
391     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
392     . "CHECK {\n"
393     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
394     . 'print join "\x00", values %INC;'
395     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
396     . "}\n"
397     . (delete $self->{trace_check});
398     close $fh;
399    
400 root 1.4 system _perl_path, "-c", "$dir/eval"
401 root 1.3 and croak "trace failure, check trace process output - caught";
402 root 1.1
403 root 1.3 my @inc = split /\x00/, do {
404     open my $fh, "<:perlio", "$dir/out"
405     or croak "$dir/out: $!";
406     local $/;
407     scalar readline $fh
408     };
409 root 1.1
410 root 1.3 my $matchprefix = $self->{matchprefix};
411 root 1.1
412 root 1.3 # remove the library directory prefix, hope for the best
413     s/$matchprefix//
414     or croak "$_: file outside any library directory"
415     for @inc;
416 root 1.1
417 root 1.3 $self->_add (\@inc);
418     }
419 root 1.1 }
420    
421     =item $extractor->add_mod ($module[, $module...])
422    
423     Adds the given module(s) to the file set - the module name must be specified
424     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
425    
426     The program will be loaded with the default import list, any dependent
427     files, such as the shared object implementing xs functions, or autoload
428     files, will also be added.
429    
430 root 1.5 If you want to use a different import list (for those rare modules wghere
431     import lists trigger different backend modules to be loaded for example),
432     you can use C<add_eval> instead:
433    
434     $extractor->add_eval ("use Module qw(a b c)");
435    
436 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
437     from the distribution they are part of.
438    
439     $extractor->add_mod ("Coro", "AnyEvent::AIO");
440    
441     =cut
442    
443     sub add_mod {
444     my $self = shift;
445    
446 root 1.3 for (@_) {
447     my $pkg = "libextractor" . ++$self->{count};
448     $self->_trace ("use $_", "{ package $pkg; use $_ }")
449     unless $self->{add_mod}{$_}++;
450     }
451 root 1.1 }
452    
453 root 1.5 =item $extractor->add_script ($name[, $name...])
454 root 1.1
455     Adds the given (perl) program(s) to the file set, that is, a program
456     installed by some perl module, written in perl (an example would be the
457     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
458     distribution).
459    
460 root 1.5 Example: add the deliantra client program installed by the
461     L<Deliantra::Client> module.
462 root 1.1
463 root 1.5 $extractor->add_script ("deliantra");
464 root 1.1
465     =cut
466    
467 root 1.5 sub add_script {
468 root 1.1 my $self = shift;
469    
470     exe:
471     for my $exe (@_) {
472     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
473     if (open my $fh, "<:perlio", "$dir/$exe") {
474    
475     my $file = do { local $/; readline $fh };
476    
477     $self->_trace_flush if exists $self->{trace_check};
478     $self->{trace_check} = $file;
479    
480 root 1.7 $self->{set}{"$self->{bindir}/$exe"} = ["$dir/$exe"];
481 root 1.1 next exe;
482     }
483     }
484    
485 root 1.5 croak "add_script ($exe): executable not found";
486 root 1.1 }
487     }
488    
489     =item $extractor->add_eval ($string)
490    
491 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
492     by it. For example, this would add L<AnyEvent> and the default backend
493     implementation module and event loop module:
494 root 1.1
495     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
496    
497 root 1.5 Each code snippet will be executed in its own package and under C<use
498     strict>.
499    
500 root 1.1 =cut
501    
502     sub add_eval {
503     my ($self, $eval) = @_;
504    
505 root 1.5 my $pkg = "libextractor" . ++$self->{count};
506 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
507 root 1.3 $self->_trace ($eval,
508     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
509 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
510 root 1.3 );
511 root 1.1 }
512    
513 root 1.5 =back
514    
515     =head2 OTHER METHODS FOR ADDING FILES
516    
517     The following methods add commonly used files that are either not covered
518     by other methods or add commonly-used dependencies.
519    
520     =over 4
521    
522 root 1.1 =item $extractor->add_perl
523    
524     Adds the perl binary itself to the file set, including the libperl dll, if
525     needed.
526    
527 root 1.5 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
528     some F<lib/libperl.so.XXX>.
529    
530 root 1.4 =cut
531    
532     sub add_perl {
533     my ($self) = @_;
534    
535 root 1.7 $self->{set}{"$self->{exedir}/perl$Config{_exe}"} = [_perl_path];
536 root 1.4
537     # on debian, we have the special case of a perl binary linked against
538     # a static libperl.a (which is not available), but the Config says to use
539     # a shared library, which is in the wrong directory, too (which breaks
540     # every other perl installation on the system - they are so stupid).
541    
542     # that means we can't find the libperl.so, because dbeian actively breaks
543     # their perl install, and we don't need it. we work around this by silently
544     # not including the libperl if we cannot find it.
545    
546     if ($Config{useshrplib} eq "true") {
547 root 1.8 my ($libperl, $libpath);
548    
549     if ($^O eq "cygwin") {
550     $libperl = $Config{libperl};
551     $libpath = "$Config{binexp}/$libperl";
552     } elsif ($^O eq "MSWin32") {
553     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
554     $libpath = "$Config{binexp}/$libperl";
555     } else {
556     $libperl = $Config{libperl};
557     $libpath = $self->{lib}{"CORE/$libperl"};
558 root 1.4 }
559 root 1.8
560     $self->{set}{"$self->{dlldir}/$libperl"} = $libpath
561     if length $libpath && -e $libpath;
562 root 1.4 }
563     }
564 root 1.3
565     =item $extractor->add_core_support
566    
567     Try to add modules and files needed to support commonly-used builtin
568     language features. For example to open a scalar for I/O you need the
569     L<PerlIO::scalar> module:
570    
571     open $fh, "<", \$scalar
572    
573     A number of regex and string features (e.g. C<ucfirst>) need some unicore
574     files, e.g.:
575    
576     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
577    
578     This call adds these files (simply by executing code similar to the above
579     code fragments).
580    
581     Notable things that are missing are other PerlIO layers, such as
582     L<PerlIO::encoding>, and named character and character class matches.
583    
584     =cut
585    
586     sub add_core_support {
587     my ($self) = @_;
588    
589     $self->add_eval ('my $v; open my $fh, "<", \$v');
590     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
591 root 1.10 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
592     $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
593 root 1.3 }
594 root 1.1
595 root 1.3 =item $extractor->add_unicore
596    
597 root 1.5 Adds (hopefully) all files form the unicore database that will ever be
598     needed.
599    
600     If you are not sure which unicode character classes and similar unicore
601     databases you need, and you do not care about an extra one thousand(!)
602     files comprising 4MB of data, then you can just call this method, which
603     adds basically all files from perl's unicode database.
604    
605     =cut
606    
607     sub add_unicore {
608     my ($self) = @_;
609    
610     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
611     }
612    
613     =back
614    
615     =head2 GLOB-BASED ADDING AND FILTERING
616 root 1.1
617 root 1.5 These methods add or manipulate files by using glob-based patterns.
618    
619     These glob patterns work similarly to glob patterns in the shell:
620    
621     =over 4
622    
623     =item /
624    
625     A F</> at the start of the pattern interprets the pattern as a file
626     path inside the file set, almost the same as in the shell. For example,
627     F</bin/perl*> would match all files whose names starting with F<perl>
628     inside the F<bin> directory in the set.
629    
630     If the F</> is missing, then the pattern is interpreted as a module name
631     (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
632     (where F<libdir> is the perl library directory), while F<Coro::*> would
633     match F<libdir/Coro/*.pm>.
634    
635     =item *
636    
637     A single star matches anything inside a single directory component. For
638     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
639     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
640 root 1.1
641 root 1.5 Another way to look at it is that a single star matches anything but a
642     slash (F</>).
643 root 1.1
644 root 1.5 =item **
645 root 1.1
646 root 1.5 A double star matches any number of characters in the path, including F</>.
647 root 1.1
648 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
649     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
650 root 1.1
651 root 1.5 =back
652 root 1.1
653 root 1.2 =cut
654    
655     sub _extglob2re {
656 root 1.5 for (quotemeta $_[1]) {
657 root 1.2 s/\\\*\\\*/.*/g;
658     s/\\\*/[^\/]*/g;
659     s/\\\?/[^\/]/g;
660    
661     unless (s%^\\/%%) {
662     s%\\:\\:%/%g;
663 root 1.5 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
664 root 1.2 }
665    
666     $_ .= '$';
667     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
668    
669     return qr<^$_>s
670     }
671     }
672    
673 root 1.5 =over 4
674    
675     =item $extractor->add_glob ($modglob[, $modglob...])
676    
677     Adds all files from the perl library that match the given glob pattern.
678    
679     For example, you could implement C<add_unicore> yourself like this:
680    
681     $extractor->add_glob ("/unicore/**.pl");
682    
683     =cut
684    
685     sub add_glob {
686     my $self = shift;
687    
688     for (@_) {
689     my $pat = $self->_extglob2re ($_);
690     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
691     }
692     }
693    
694     =item $extractor->filter ($pattern[, $pattern...])
695    
696     Applies a series of include/exclude filters. Each filter must start with
697     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
698     pattern. The rest of the pattern is a normal glob pattern.
699    
700     An exclude pattern (C<->) instantly removes all matching files from
701     the set. An include pattern (C<+>) protects matching files from later
702     removals.
703    
704     That is, if you have an include pattern then all files that were matched
705     by it will be included in the set, regardless of any further exclude
706     patterns matching the same files.
707    
708     Likewise, any file excluded by a pattern will not be included in the set,
709     even if matched by later include patterns.
710    
711     Any files not matched by any expression will simply stay in the set.
712    
713     For example, to remove most of the useless autoload functions by the POSIX
714     module (they either do the same thing as a builtin or always raise an
715     error), you would use this (assuming a default C<libdir>):
716    
717     $extractor->filter ("-/lib/auto/POSIX/*.al");
718    
719     This does not remove all autoload files, only the ones not defined by a
720     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
721    
722     =cut
723    
724 root 1.2 sub filter {
725     my ($self, @patterns) = @_;
726    
727 root 1.3 $self->_trace_flush;
728    
729     my $set = $self->{set};
730     my %include;
731 root 1.2
732     for my $pat (@patterns) {
733     $pat =~ s/^([+\-])//
734     or croak "$_: not a valid filter pattern (missing + or - prefix)";
735     my $inc = $1 eq "+";
736     $pat = $self->_extglob2re ($pat);
737 root 1.3
738     my @match = grep /$pat/, keys %$set;
739    
740     if ($inc) {
741     @include{@match} = delete @$set{@match};
742     } else {
743     delete @$set{@{ $_->[I_DEP] }} # remove dependents
744     for delete @$set{@match};
745     }
746 root 1.2 }
747 root 1.3
748     my @include = keys %include;
749     @$set{@include} = delete @include{@include};
750 root 1.2 }
751 root 1.1
752 root 1.3 =item $extractor->runtime_only
753    
754     This removes all files that are not needed at runtime, such as static
755     archives, header and other files needed only for compilation of modules,
756     and pod and html files (which are unlikely to be needed at runtime).
757 root 1.1
758 root 1.8 This is quite useful when you want to have only files actually needed to
759 root 1.3 execute a program.
760    
761     =cut
762    
763     sub runtime_only {
764     my ($self) = @_;
765    
766     $self->_trace_flush;
767    
768     my $set = $self->{set};
769    
770 root 1.8 # delete all static libraries, also windows stuff
771     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
772 root 1.3
773     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
774     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
775    
776     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
777 root 1.8 delete @$set{ grep m%^\Q$self->{libdir}\E/.*\.(?:pod|h|html)$%s, keys %$set };
778 root 1.3 }
779 root 1.1
780     =back
781    
782 root 1.5 =head2 RESULT SET
783    
784     =over 4
785 root 1.1
786 root 1.5 =item $set = $extractor->set
787 root 1.1
788 root 1.5 Returns a hash reference that represents the result set. The hash is the
789     actual internal storage hash and can only be modified as described below.
790 root 1.1
791 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
792     e.g.:
793 root 1.1
794 root 1.5 bin/perl
795     lib/unicore/lib/Blk/Superscr.pl
796     lib/AnyEvent/Impl/EV.pm
797    
798     The value is an array reference with mostly unspecified contents, except
799     the first element, which is the file system path where the actual file can
800     be found.
801    
802     This code snippet lists all files inside the set:
803    
804     print "$_\n"
805     for sort keys %{ $extractor->set });
806    
807     This code fragment prints C<< filesystem_path => set_path >> pairs for all
808     files in the set:
809    
810     my $set = $extractor->set;
811     while (my ($set,$fspath) = each %$set) {
812     print "$fspath => $set\n";
813     }
814 root 1.1
815 root 1.5 You can implement your own filtering by asking for the result set with
816     C<< $extractor->set >>, and then deleting keys from the referenced hash
817     - since you can ask for the result set at any time you can add things,
818     filter them out this way, and add additional things.
819    
820     =back
821    
822     =cut
823    
824     sub set {
825 root 1.6 $_[0]->_trace_flush;
826 root 1.5 $_[0]{set}
827     }
828    
829     =head1 EXAMPLE
830    
831     To package he deliantra client (L<Deliantra::Client>), finding all
832     (perl) files needed to run it is a first step. This can be done by using
833     something like the following code snippet:
834    
835     my $ex = new Perl::LibExtractor
836     exedir => ".", dlldir => ".",
837     libdir => "pm", bindir => "pm/bin";
838    
839     $ex->add_perl;
840     $ex->add_core_support;
841     $ex->add_script ("deliantra");
842     $ex->add_mod ("AnyEvent::Impl::EV");
843     $ex->add_mod ("AnyEvent::Impl::Perl");
844     $ex->add_mod ("Urlader");
845     $ex->filter ("-/*/auto/POSIX/**.al");
846     $ex->runtime_only;
847    
848     Let's first find out about the choice of paths for the subset. The
849     Deliantra client binary packages use L<Urlader> nowadays, and there it is
850     convenient to have F<perl> and any shared libraries directly in the root
851     of the distribution.
852    
853     The perl library files are put into a directory named F<pm>, simply
854     because it's shorter than F<lib>, and in the future, some files might go
855     into F<lib>.
856    
857     And finally, the F<deliantra> script itself is put into the perl library
858     directory, because it is not run directly - the installed client uses the
859     system fonts and other resources, while the binary package is supposed
860     to use the files packaged with it. To achieve this, a wrapper script is
861     created, called F<run>; which displays a splash screen and configures the
862     environment. A simplified version of it could look like this:
863    
864     @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
865     $ENV{PANGO_RC_FILE} = "pango.rc";
866     require "bin/deliantra";
867     exit 0;
868    
869     First it sets the perl library directory to F<pm> and F<.> (the latter
870     to work around some AutoLoader bugs), so perl uses only the perl library
871     files that came with the binary package.
872    
873     Then it sets some environment variable to override the system default
874     (which might be incompatible).
875    
876     Then it runs the client itself, using C<require>. Since C<require> only
877     looks in the perl library directory this is the reaosn why the scripts
878     were put there (of course, since F<.> is also included it doesn't matter,
879     but I refuse to yield to bugs).
880    
881     Finally it exits with a clean status to signal "ok" to Urlader.
882    
883     Back to the original C<Perl::LibExtractor> script: after initialising a
884     new set, the script simply adds the F<perl> interpreter and core support
885     files (just in case, not all are needed, but some are, and I am too lazy
886     to find out which ones exactly).
887    
888     Then it adds the deliantra executable itself, which in turn adds most of
889     the required modules. After that, the AnyEvent implementation modules are
890     added because these dependencies are not picked up automatically.
891    
892     The L<Urlader> module is added because the client itself does not depend
893     on it at all, but the wrapper does.
894    
895     At this point, all required files are present, and it's time to slim
896     down: most of the ueseless POSIX autoloaded functions are removed,
897     not because they are so big, but because creating files is a costly
898     operation in itself, so even small fiels have considerable overhead when
899     unpacking. Then files not required for running the client are removed.
900    
901     And that concludes it, the set is now ready.
902 root 1.1
903     =head1 SEE ALSO
904    
905     The utility program that comes with this module: L<perl-libextract>.
906    
907     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
908    
909 root 1.5 =head1 LICENSE
910    
911     This software package is licensed under the GPL version 3 or any later
912     version, see COPYING for details.
913    
914     This license does not, of course, apply to any output generated by this
915     software.
916    
917 root 1.1 =head1 AUTHOR
918    
919     Marc Lehmann <schmorp@schmorp.de>
920     http://home.schmorp.de/
921    
922     =cut
923    
924     1;
925