ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.15
Committed: Thu Jan 26 01:24:25 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.14: +14 -15 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72 root 1.12 Binary executables (such as the perl interpreter) are stored inside
73     F<bin/>, perl scripts are stored under F<script/>, perl library files are
74     stored under F<lib/> and shared libraries are stored under F<dll/>.
75    
76 root 1.1 The following key-value pairs exist, with default values as specified.
77    
78     =over 4
79    
80     =item inc => \@INC without "."
81    
82     An arrayref with paths to perl library directories. The default is
83     C<\@INC>, with F<.> removed.
84    
85     To prepend custom dirs just do this:
86    
87     inc => ["mydir", @INC],
88    
89 root 1.3 =item use_packlist => 1
90 root 1.1
91 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 root 1.5 then each time a file is traced, the complete distribution that contains
93     it is included (but not traced).
94    
95     If disabled, only shared objects and autoload files will be added.
96 root 1.1
97 root 1.13 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98     this option will fail. Other perls should be fine.
99    
100 root 1.1 =back
101    
102     =cut
103    
104     sub new {
105     my ($class, %kv) = @_;
106    
107     my $self = bless {
108 root 1.12 inc => [grep $_ ne ".", @INC],
109 root 1.3 use_packlist => 1,
110 root 1.1 %kv,
111     set => {},
112     }, $class;
113    
114     my %inc_seen;
115 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
116 root 1.1 $self->{inc} = \@inc;
117    
118 root 1.14 # maybe not inc, but these?
119     # sitearchexp
120     # sitelib
121     # vendorarchexp
122     # vendorlibexp
123     # archlibexp
124     # privlibexp
125    
126 root 1.1 $self->_set_inc;
127    
128     $self
129     }
130    
131 root 1.4 sub _perl_path() {
132     my $secure_perl_path = $Config{perlpath};
133    
134     if ($^O ne 'VMS') {
135     $secure_perl_path .= $Config{_exe}
136     unless $secure_perl_path =~ m/$Config{_exe}$/i;
137     }
138    
139     $secure_perl_path
140     }
141    
142 root 1.8 sub _path2match {
143     my $re = join "|", map "\Q$_", @_;
144    
145     $re = "(?:$re)\\/";
146     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
147    
148     $case_tolerant
149     ? qr<$re>i
150     : qr<$re>
151     }
152    
153 root 1.11 sub _read_packlist {
154 root 1.15 my ($self, $path) = @_;
155 root 1.11
156 root 1.15 my $pfxmatch = $self->{pfxmatch};
157     my $lib = $self->{lib};
158 root 1.11
159     my @packlist;
160    
161 root 1.15 open my $fh, "<:perlio", $path
162     or die "$path: $!";
163 root 1.11
164     while (<$fh>) {
165     chomp;
166     s/ .*$//; # newer-style .packlists might contain key=value pairs
167    
168 root 1.15 s/$pfxmatch// and exists $lib->{$_}
169 root 1.11 or next;
170    
171     push @packlist, canonpath $_;
172     }
173    
174     \@packlist
175     }
176    
177 root 1.1 sub _set_inc {
178     my ($self) = @_;
179    
180 root 1.15 my $pfxmatch = _path2match @{ $self->{inc }};
181 root 1.2
182 root 1.1 my %lib;
183 root 1.2 my @packlists;
184 root 1.1
185     # find all files in all libdirs, earlier ones overwrite later ones
186 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
187    
188     while (@scan) {
189     my ($root, $dir) = @{ pop @scan };
190    
191     my $pfx = length $dir ? "$dir/" : "";
192    
193     for (do {
194     opendir my $fh, "$root/$dir"
195     or croak "$root/$dir: $!";
196     grep !/^\.\.?$/, readdir $fh
197     }) {
198     if (-d "$root/$dir/$_/.") {
199     $lib{"$pfx$_/"} = "$root/$pfx$_";
200     push @scan, [$root, "$pfx$_"];
201     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
202 root 1.15 push @packlists, "$root/$pfx.packlist";
203 root 1.2 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
204     # skip empty .bs files
205     # } elsif (/\.(?:pod|h|html)$/) {
206     # # not interested in those
207     } else {
208     #push @files, $_;
209     $lib{"$pfx$_"} = "$root/$pfx$_";
210 root 1.1 }
211 root 1.2 }
212    
213     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
214     }
215    
216 root 1.15 $self->{lib} = \%lib;
217     $self->{pfxmatch} = $pfxmatch;
218 root 1.14
219 root 1.2 my %packlist;
220 root 1.1
221 root 1.2 # need to go forward here
222     for (@packlists) {
223 root 1.15 my $packlist = $self->_read_packlist ($_);
224 root 1.2
225 root 1.11 $packlist{$_} = $packlist
226     for @$packlist;
227 root 1.1 }
228    
229 root 1.14 $self->{packlist} = \%packlist;
230 root 1.1 }
231    
232 root 1.5 =back
233    
234     =head2 TRACE/PACKLIST BASED ADDING
235    
236     The following methods add various things to the set of files.
237    
238     Each time a perl file is added, it is scanned by tracing either loading,
239     execution or compiling it, and seeing which other perl modules and
240     libraries have been loaded.
241    
242     For each library file found this way, additional dependencies are added:
243     if packlists are enabled, then all files of the distribution that contains
244     the file will be added. If packlists are disabled, then only shared
245     objects and autoload files for modules will be added.
246    
247     Only files from perl library directories will be added automatically. Any
248     other files (such as manpages or scripts installed in the F<bin>
249     directory) are skipped.
250    
251     If there is an error, such as a module not being found, then this module
252     croaks (as opposed to silently skipping). If you want to add something of
253     which you are not sure it exists, then you can wrap the call into C<eval
254     {}>. In some cases, you can avoid this by executing the code you want
255     to work later using C<add_eval> - see C<add_core_support> for an actual
256     example of this technique.
257    
258     Note that packlists are meant to add files not covered by other
259     mechanisms, such as resource files and other data files loaded directly by
260     a module - they are not meant to add dependencies that are missed because
261     they only happen at runtime.
262    
263     For example, with packlists, when using L<AnyEvent>, then all event loop
264     backends are automatically added as well, but I<not> any event loops
265     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
266     packlists, only the backend that is being used is added (i.e. normally
267     none, as loading AnyEvent does not instantly load any backend).
268    
269     To catch the extra event loop dependencies, you can either initialise
270     AnyEvent so it picks a suitable backend:
271    
272     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
273    
274     Or you can directly load the backend modules you plan to use:
275    
276     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
277    
278     An example of a program (or module) that has extra resource files is
279     L<Deliantra::Client> - the normal tracing (without packlist usage) will
280     correctly add all submodules, but miss the fonts and textures. By using
281     the packlist, those files are added correctly.
282    
283     =over 4
284    
285     =cut
286    
287 root 1.1 sub _add {
288     my ($self, $add) = @_;
289    
290     my $lib = $self->{lib};
291     my $path;
292    
293     for (@$add) {
294 root 1.12 $path = "lib/$_";
295 root 1.1
296     $self->{set}{$path} ||= do {
297     my @info;
298    
299     $info[I_SRC] = $lib->{$_}
300     or croak "$_: unable to locate file in perl library";
301    
302 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
303 root 1.12 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
304 root 1.2 for @{ $self->{packlist}{$_} };
305 root 1.3
306     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
307     # s/\.pm$//;
308     # s%/%::%g;
309     # my $pkg = "libextractor" . ++$self->{count};
310     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
311     # unless $self->{_add_do}{$_}++;
312     # }
313     #
314     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
315     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
316    
317 root 1.2 } elsif (/^(.*)\.pm$/) {
318 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
319     $auto =~ m%/([^/]+)/$% or die;
320     my $base = $1;
321    
322     if (exists $lib->{$auto}) {
323     # auto dir exists, scan it for cool stuff
324    
325     # 1. shared object, others are of no interest to us
326     my $so = "$auto$base.$Config{dlext}";
327     if (my $src = $lib->{$so}) {
328 root 1.12 $so = "lib/$so";
329 root 1.1 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
330     }
331    
332     # 2. autoloader/autosplit
333     my $ix = "${auto}autosplit.ix";
334     if (my $src = $lib->{$ix}) {
335 root 1.12 $ix = "lib/$ix";
336 root 1.1 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
337    
338     open my $fh, "<:perlio", $src
339     or croak "$src: $!";
340    
341     my $package;
342    
343     while (<$fh>) {
344     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
345     my $al = "auto/$package/$1.al";
346     my $src = $lib->{$al}
347     or croak "$al: autoload file not found, but should be there.";
348    
349 root 1.12 $al = "lib/$al";
350 root 1.1 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
351    
352     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
353     ($package = $1) =~ s/::/\//g;
354     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
355     # nop
356     } else {
357     warn "WARNING: $src: unparsable line, please report: $_";
358     }
359     }
360     }
361    
362     skip:
363     }
364     }
365    
366     \@info
367     };
368     }
369     }
370    
371     sub _trace {
372     my ($self, $file, $eval) = @_;
373    
374 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
375 root 1.1 }
376    
377     sub _trace_flush {
378     my ($self) = @_;
379    
380 root 1.3 # ->_add might add additional files to trace
381     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
382     my $tmpdir = newdir File::Temp;
383     my $dir = $tmpdir->dirname;
384    
385     open my $fh, ">:perlio", "$dir/eval"
386     or croak "$dir/eval: $!";
387     syswrite $fh,
388     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
389     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
390     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
391     . "CHECK {\n"
392     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
393     . 'print join "\x00", values %INC;'
394     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
395     . "}\n"
396     . (delete $self->{trace_check});
397     close $fh;
398    
399 root 1.4 system _perl_path, "-c", "$dir/eval"
400 root 1.3 and croak "trace failure, check trace process output - caught";
401 root 1.1
402 root 1.3 my @inc = split /\x00/, do {
403     open my $fh, "<:perlio", "$dir/out"
404     or croak "$dir/out: $!";
405     local $/;
406     scalar readline $fh
407     };
408 root 1.1
409 root 1.15 my $pfxmatch = $self->{pfxmatch};
410 root 1.1
411 root 1.3 # remove the library directory prefix, hope for the best
412 root 1.15 s/$pfxmatch//
413 root 1.3 or croak "$_: file outside any library directory"
414     for @inc;
415 root 1.1
416 root 1.3 $self->_add (\@inc);
417     }
418 root 1.1 }
419    
420     =item $extractor->add_mod ($module[, $module...])
421    
422     Adds the given module(s) to the file set - the module name must be specified
423     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
424    
425     The program will be loaded with the default import list, any dependent
426     files, such as the shared object implementing xs functions, or autoload
427     files, will also be added.
428    
429 root 1.5 If you want to use a different import list (for those rare modules wghere
430     import lists trigger different backend modules to be loaded for example),
431     you can use C<add_eval> instead:
432    
433     $extractor->add_eval ("use Module qw(a b c)");
434    
435 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
436     from the distribution they are part of.
437    
438     $extractor->add_mod ("Coro", "AnyEvent::AIO");
439    
440     =cut
441    
442     sub add_mod {
443     my $self = shift;
444    
445 root 1.3 for (@_) {
446     my $pkg = "libextractor" . ++$self->{count};
447     $self->_trace ("use $_", "{ package $pkg; use $_ }")
448     unless $self->{add_mod}{$_}++;
449     }
450 root 1.1 }
451    
452 root 1.12 =item $extractor->add_bin ($name[, $name...])
453 root 1.1
454     Adds the given (perl) program(s) to the file set, that is, a program
455     installed by some perl module, written in perl (an example would be the
456     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
457     distribution).
458    
459 root 1.5 Example: add the deliantra client program installed by the
460 root 1.12 L<Deliantra::Client> module and put it under F<bin/deliantra>.
461 root 1.1
462 root 1.12 $extractor->add_bin ("deliantra");
463 root 1.1
464     =cut
465    
466 root 1.12 sub add_bin {
467 root 1.1 my $self = shift;
468    
469     exe:
470     for my $exe (@_) {
471     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
472     if (open my $fh, "<:perlio", "$dir/$exe") {
473 root 1.12 if (-f $fh) {
474     my $file = do { local $/; readline $fh };
475 root 1.1
476 root 1.12 $self->_trace_flush if exists $self->{trace_check};
477     $self->{trace_check} = $file;
478 root 1.1
479 root 1.12 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
480     next exe;
481     }
482 root 1.1 }
483     }
484    
485 root 1.12 croak "add_bin ($exe): executable not found";
486 root 1.1 }
487     }
488    
489     =item $extractor->add_eval ($string)
490    
491 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
492     by it. For example, this would add L<AnyEvent> and the default backend
493     implementation module and event loop module:
494 root 1.1
495     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
496    
497 root 1.5 Each code snippet will be executed in its own package and under C<use
498     strict>.
499    
500 root 1.1 =cut
501    
502     sub add_eval {
503     my ($self, $eval) = @_;
504    
505 root 1.5 my $pkg = "libextractor" . ++$self->{count};
506 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
507 root 1.3 $self->_trace ($eval,
508     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
509 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
510 root 1.3 );
511 root 1.1 }
512    
513 root 1.5 =back
514    
515     =head2 OTHER METHODS FOR ADDING FILES
516    
517     The following methods add commonly used files that are either not covered
518     by other methods or add commonly-used dependencies.
519    
520     =over 4
521    
522 root 1.1 =item $extractor->add_perl
523    
524     Adds the perl binary itself to the file set, including the libperl dll, if
525     needed.
526    
527 root 1.12 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
528     some F<dll/libperl.so.XXX>.
529 root 1.5
530 root 1.4 =cut
531    
532     sub add_perl {
533     my ($self) = @_;
534    
535 root 1.12 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
536 root 1.4
537     # on debian, we have the special case of a perl binary linked against
538     # a static libperl.a (which is not available), but the Config says to use
539     # a shared library, which is in the wrong directory, too (which breaks
540     # every other perl installation on the system - they are so stupid).
541    
542     # that means we can't find the libperl.so, because dbeian actively breaks
543     # their perl install, and we don't need it. we work around this by silently
544     # not including the libperl if we cannot find it.
545    
546     if ($Config{useshrplib} eq "true") {
547 root 1.8 my ($libperl, $libpath);
548    
549     if ($^O eq "cygwin") {
550     $libperl = $Config{libperl};
551     $libpath = "$Config{binexp}/$libperl";
552     } elsif ($^O eq "MSWin32") {
553     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
554     $libpath = "$Config{binexp}/$libperl";
555     } else {
556     $libperl = $Config{libperl};
557     $libpath = $self->{lib}{"CORE/$libperl"};
558 root 1.4 }
559 root 1.8
560 root 1.12 $self->{set}{"dll/$libperl"} = $libpath
561 root 1.8 if length $libpath && -e $libpath;
562 root 1.4 }
563     }
564 root 1.3
565     =item $extractor->add_core_support
566    
567     Try to add modules and files needed to support commonly-used builtin
568     language features. For example to open a scalar for I/O you need the
569     L<PerlIO::scalar> module:
570    
571     open $fh, "<", \$scalar
572    
573     A number of regex and string features (e.g. C<ucfirst>) need some unicore
574     files, e.g.:
575    
576     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
577    
578     This call adds these files (simply by executing code similar to the above
579     code fragments).
580    
581     Notable things that are missing are other PerlIO layers, such as
582     L<PerlIO::encoding>, and named character and character class matches.
583    
584     =cut
585    
586     sub add_core_support {
587     my ($self) = @_;
588    
589     $self->add_eval ('my $v; open my $fh, "<", \$v');
590     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
591 root 1.10 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
592     $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
593 root 1.3 }
594 root 1.1
595 root 1.3 =item $extractor->add_unicore
596    
597 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
598 root 1.5 needed.
599    
600     If you are not sure which unicode character classes and similar unicore
601     databases you need, and you do not care about an extra one thousand(!)
602     files comprising 4MB of data, then you can just call this method, which
603     adds basically all files from perl's unicode database.
604    
605 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
606     subset of C<add_unicore> - the former adds all files neccessary to support
607     core builtins (which includes some unicore files and other things), while
608     the latter adds all unicore files (but nothing else).
609    
610     When in doubt, use both.
611    
612 root 1.5 =cut
613    
614     sub add_unicore {
615     my ($self) = @_;
616    
617     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
618     }
619    
620 root 1.11 =item $extractor->add_core
621    
622     This adds all files from the perl core distribution, that is, all library
623     files that come with perl.
624    
625     This is a superset of C<add_core_support> and C<add_unicore>.
626    
627     This is quite a lot, but on the plus side, you can be sure nothing is
628     missing.
629    
630 root 1.13 This requires a full perl installation - Debian GNU/Linux doesn't package
631     the full perl library, so this function will not work there.
632 root 1.11
633     =cut
634    
635     sub add_core {
636     my ($self) = @_;
637    
638     my $lib = $self->{lib};
639    
640     for (@{
641 root 1.15 $self->_read_packlist (".packlist")
642 root 1.11 }) {
643     $self->{set}{$_} ||= [
644 root 1.12 "lib/"
645 root 1.11 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
646     ];
647     }
648     }
649    
650 root 1.5 =back
651    
652     =head2 GLOB-BASED ADDING AND FILTERING
653 root 1.1
654 root 1.5 These methods add or manipulate files by using glob-based patterns.
655    
656     These glob patterns work similarly to glob patterns in the shell:
657    
658     =over 4
659    
660     =item /
661    
662     A F</> at the start of the pattern interprets the pattern as a file
663     path inside the file set, almost the same as in the shell. For example,
664     F</bin/perl*> would match all files whose names starting with F<perl>
665     inside the F<bin> directory in the set.
666    
667     If the F</> is missing, then the pattern is interpreted as a module name
668 root 1.12 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
669     while F<Coro::*> would match F<lib/Coro/*.pm>.
670 root 1.5
671     =item *
672    
673     A single star matches anything inside a single directory component. For
674     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
675     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
676 root 1.1
677 root 1.5 Another way to look at it is that a single star matches anything but a
678     slash (F</>).
679 root 1.1
680 root 1.5 =item **
681 root 1.1
682 root 1.5 A double star matches any number of characters in the path, including F</>.
683 root 1.1
684 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
685     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
686 root 1.1
687 root 1.5 =back
688 root 1.1
689 root 1.2 =cut
690    
691     sub _extglob2re {
692 root 1.5 for (quotemeta $_[1]) {
693 root 1.2 s/\\\*\\\*/.*/g;
694     s/\\\*/[^\/]*/g;
695     s/\\\?/[^\/]/g;
696    
697     unless (s%^\\/%%) {
698     s%\\:\\:%/%g;
699 root 1.12 $_ = "lib/$_\\.pm";
700 root 1.2 }
701    
702     $_ .= '$';
703     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
704    
705     return qr<^$_>s
706     }
707     }
708    
709 root 1.5 =over 4
710    
711     =item $extractor->add_glob ($modglob[, $modglob...])
712    
713     Adds all files from the perl library that match the given glob pattern.
714    
715     For example, you could implement C<add_unicore> yourself like this:
716    
717     $extractor->add_glob ("/unicore/**.pl");
718    
719     =cut
720    
721     sub add_glob {
722     my $self = shift;
723    
724     for (@_) {
725     my $pat = $self->_extglob2re ($_);
726     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
727     }
728     }
729    
730     =item $extractor->filter ($pattern[, $pattern...])
731    
732     Applies a series of include/exclude filters. Each filter must start with
733     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
734     pattern. The rest of the pattern is a normal glob pattern.
735    
736     An exclude pattern (C<->) instantly removes all matching files from
737     the set. An include pattern (C<+>) protects matching files from later
738     removals.
739    
740     That is, if you have an include pattern then all files that were matched
741     by it will be included in the set, regardless of any further exclude
742     patterns matching the same files.
743    
744     Likewise, any file excluded by a pattern will not be included in the set,
745     even if matched by later include patterns.
746    
747     Any files not matched by any expression will simply stay in the set.
748    
749     For example, to remove most of the useless autoload functions by the POSIX
750     module (they either do the same thing as a builtin or always raise an
751 root 1.12 error), you would use this:
752 root 1.5
753     $extractor->filter ("-/lib/auto/POSIX/*.al");
754    
755     This does not remove all autoload files, only the ones not defined by a
756     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
757    
758     =cut
759    
760 root 1.2 sub filter {
761     my ($self, @patterns) = @_;
762    
763 root 1.3 $self->_trace_flush;
764    
765     my $set = $self->{set};
766     my %include;
767 root 1.2
768     for my $pat (@patterns) {
769     $pat =~ s/^([+\-])//
770     or croak "$_: not a valid filter pattern (missing + or - prefix)";
771     my $inc = $1 eq "+";
772     $pat = $self->_extglob2re ($pat);
773 root 1.3
774     my @match = grep /$pat/, keys %$set;
775    
776     if ($inc) {
777     @include{@match} = delete @$set{@match};
778     } else {
779     delete @$set{@{ $_->[I_DEP] }} # remove dependents
780     for delete @$set{@match};
781     }
782 root 1.2 }
783 root 1.3
784     my @include = keys %include;
785     @$set{@include} = delete @include{@include};
786 root 1.2 }
787 root 1.1
788 root 1.3 =item $extractor->runtime_only
789    
790     This removes all files that are not needed at runtime, such as static
791     archives, header and other files needed only for compilation of modules,
792     and pod and html files (which are unlikely to be needed at runtime).
793 root 1.1
794 root 1.8 This is quite useful when you want to have only files actually needed to
795 root 1.3 execute a program.
796    
797     =cut
798    
799     sub runtime_only {
800     my ($self) = @_;
801    
802     $self->_trace_flush;
803    
804     my $set = $self->{set};
805    
806 root 1.8 # delete all static libraries, also windows stuff
807 root 1.12 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
808 root 1.3
809     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
810 root 1.12 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
811 root 1.3
812     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
813 root 1.12 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
814 root 1.11
815     # delete unneeded unicore files
816 root 1.12 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
817 root 1.3 }
818 root 1.1
819     =back
820    
821 root 1.5 =head2 RESULT SET
822    
823     =over 4
824 root 1.1
825 root 1.5 =item $set = $extractor->set
826 root 1.1
827 root 1.5 Returns a hash reference that represents the result set. The hash is the
828     actual internal storage hash and can only be modified as described below.
829 root 1.1
830 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
831     e.g.:
832 root 1.1
833 root 1.5 bin/perl
834     lib/unicore/lib/Blk/Superscr.pl
835     lib/AnyEvent/Impl/EV.pm
836    
837     The value is an array reference with mostly unspecified contents, except
838     the first element, which is the file system path where the actual file can
839     be found.
840    
841     This code snippet lists all files inside the set:
842    
843     print "$_\n"
844     for sort keys %{ $extractor->set });
845    
846     This code fragment prints C<< filesystem_path => set_path >> pairs for all
847     files in the set:
848    
849     my $set = $extractor->set;
850     while (my ($set,$fspath) = each %$set) {
851     print "$fspath => $set\n";
852     }
853 root 1.1
854 root 1.5 You can implement your own filtering by asking for the result set with
855     C<< $extractor->set >>, and then deleting keys from the referenced hash
856     - since you can ask for the result set at any time you can add things,
857     filter them out this way, and add additional things.
858    
859     =back
860    
861     =cut
862    
863     sub set {
864 root 1.6 $_[0]->_trace_flush;
865 root 1.5 $_[0]{set}
866     }
867    
868     =head1 EXAMPLE
869    
870     To package he deliantra client (L<Deliantra::Client>), finding all
871     (perl) files needed to run it is a first step. This can be done by using
872     something like the following code snippet:
873    
874 root 1.12 my $ex = new Perl::LibExtractor;
875 root 1.5
876     $ex->add_perl;
877     $ex->add_core_support;
878 root 1.12 $ex->add_bin ("deliantra");
879 root 1.5 $ex->add_mod ("AnyEvent::Impl::EV");
880     $ex->add_mod ("AnyEvent::Impl::Perl");
881     $ex->add_mod ("Urlader");
882     $ex->filter ("-/*/auto/POSIX/**.al");
883     $ex->runtime_only;
884    
885     First it sets the perl library directory to F<pm> and F<.> (the latter
886     to work around some AutoLoader bugs), so perl uses only the perl library
887     files that came with the binary package.
888    
889     Then it sets some environment variable to override the system default
890     (which might be incompatible).
891    
892     Then it runs the client itself, using C<require>. Since C<require> only
893     looks in the perl library directory this is the reaosn why the scripts
894     were put there (of course, since F<.> is also included it doesn't matter,
895     but I refuse to yield to bugs).
896    
897     Finally it exits with a clean status to signal "ok" to Urlader.
898    
899     Back to the original C<Perl::LibExtractor> script: after initialising a
900     new set, the script simply adds the F<perl> interpreter and core support
901     files (just in case, not all are needed, but some are, and I am too lazy
902     to find out which ones exactly).
903    
904     Then it adds the deliantra executable itself, which in turn adds most of
905     the required modules. After that, the AnyEvent implementation modules are
906     added because these dependencies are not picked up automatically.
907    
908     The L<Urlader> module is added because the client itself does not depend
909     on it at all, but the wrapper does.
910    
911     At this point, all required files are present, and it's time to slim
912     down: most of the ueseless POSIX autoloaded functions are removed,
913     not because they are so big, but because creating files is a costly
914     operation in itself, so even small fiels have considerable overhead when
915     unpacking. Then files not required for running the client are removed.
916    
917     And that concludes it, the set is now ready.
918 root 1.1
919     =head1 SEE ALSO
920    
921     The utility program that comes with this module: L<perl-libextract>.
922    
923     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
924    
925 root 1.5 =head1 LICENSE
926    
927     This software package is licensed under the GPL version 3 or any later
928     version, see COPYING for details.
929    
930     This license does not, of course, apply to any output generated by this
931     software.
932    
933 root 1.1 =head1 AUTHOR
934    
935     Marc Lehmann <schmorp@schmorp.de>
936     http://home.schmorp.de/
937    
938     =cut
939    
940     1;
941