ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.14
Committed: Wed Jan 25 21:27:54 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.13: +12 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72 root 1.12 Binary executables (such as the perl interpreter) are stored inside
73     F<bin/>, perl scripts are stored under F<script/>, perl library files are
74     stored under F<lib/> and shared libraries are stored under F<dll/>.
75    
76 root 1.1 The following key-value pairs exist, with default values as specified.
77    
78     =over 4
79    
80     =item inc => \@INC without "."
81    
82     An arrayref with paths to perl library directories. The default is
83     C<\@INC>, with F<.> removed.
84    
85     To prepend custom dirs just do this:
86    
87     inc => ["mydir", @INC],
88    
89 root 1.3 =item use_packlist => 1
90 root 1.1
91 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 root 1.5 then each time a file is traced, the complete distribution that contains
93     it is included (but not traced).
94    
95     If disabled, only shared objects and autoload files will be added.
96 root 1.1
97 root 1.13 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98     this option will fail. Other perls should be fine.
99    
100 root 1.1 =back
101    
102     =cut
103    
104     sub new {
105     my ($class, %kv) = @_;
106    
107     my $self = bless {
108 root 1.12 inc => [grep $_ ne ".", @INC],
109 root 1.3 use_packlist => 1,
110 root 1.1 %kv,
111     set => {},
112     }, $class;
113    
114     my %inc_seen;
115 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
116 root 1.1 $self->{inc} = \@inc;
117    
118 root 1.14 # maybe not inc, but these?
119     # sitearchexp
120     # sitelib
121     # vendorarchexp
122     # vendorlibexp
123     # archlibexp
124     # privlibexp
125    
126 root 1.1 $self->_set_inc;
127    
128     $self
129     }
130    
131 root 1.4 sub _perl_path() {
132     my $secure_perl_path = $Config{perlpath};
133    
134     if ($^O ne 'VMS') {
135     $secure_perl_path .= $Config{_exe}
136     unless $secure_perl_path =~ m/$Config{_exe}$/i;
137     }
138    
139     $secure_perl_path
140     }
141    
142 root 1.8 sub _path2match {
143     my $re = join "|", map "\Q$_", @_;
144    
145     $re = "(?:$re)\\/";
146     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
147    
148     $case_tolerant
149     ? qr<$re>i
150     : qr<$re>
151     }
152    
153 root 1.11 sub _read_packlist {
154     my ($self, $root, $path) = @_;
155    
156     my $lib = $self->{lib};
157    
158     my @packlist;
159    
160     open my $fh, "<:perlio", "$root/$path"
161     or die "$root/$path: $!";
162    
163     $root = _path2match $root;
164    
165     while (<$fh>) {
166     chomp;
167     s/ .*$//; # newer-style .packlists might contain key=value pairs
168    
169     s/$root// and exists $lib->{$_}
170     or next;
171    
172     push @packlist, canonpath $_;
173     }
174    
175     \@packlist
176     }
177    
178 root 1.1 sub _set_inc {
179     my ($self) = @_;
180    
181 root 1.8 my $matchprefix = _path2match @{ $self->{inc }};
182 root 1.2
183 root 1.1 my %lib;
184 root 1.2 my @packlists;
185 root 1.1
186     # find all files in all libdirs, earlier ones overwrite later ones
187 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
188    
189     while (@scan) {
190     my ($root, $dir) = @{ pop @scan };
191    
192     my $pfx = length $dir ? "$dir/" : "";
193    
194     for (do {
195     opendir my $fh, "$root/$dir"
196     or croak "$root/$dir: $!";
197     grep !/^\.\.?$/, readdir $fh
198     }) {
199     if (-d "$root/$dir/$_/.") {
200     $lib{"$pfx$_/"} = "$root/$pfx$_";
201     push @scan, [$root, "$pfx$_"];
202     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
203     push @packlists, [$root, $pfx];
204     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
205     # skip empty .bs files
206     # } elsif (/\.(?:pod|h|html)$/) {
207     # # not interested in those
208     } else {
209     #push @files, $_;
210     $lib{"$pfx$_"} = "$root/$pfx$_";
211 root 1.1 }
212 root 1.2 }
213    
214     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
215     }
216    
217 root 1.14 $self->{lib} = \%lib;
218     $self->{matchprefix} = $matchprefix;
219    
220 root 1.2 my %packlist;
221 root 1.1
222 root 1.2 # need to go forward here
223     for (@packlists) {
224 root 1.11 my $packlist = $self->_read_packlist ($_->[0], "$_->[1]/.packlist");
225 root 1.2
226 root 1.11 $packlist{$_} = $packlist
227     for @$packlist;
228 root 1.1 }
229    
230 root 1.14 $self->{packlist} = \%packlist;
231 root 1.1 }
232    
233 root 1.5 =back
234    
235     =head2 TRACE/PACKLIST BASED ADDING
236    
237     The following methods add various things to the set of files.
238    
239     Each time a perl file is added, it is scanned by tracing either loading,
240     execution or compiling it, and seeing which other perl modules and
241     libraries have been loaded.
242    
243     For each library file found this way, additional dependencies are added:
244     if packlists are enabled, then all files of the distribution that contains
245     the file will be added. If packlists are disabled, then only shared
246     objects and autoload files for modules will be added.
247    
248     Only files from perl library directories will be added automatically. Any
249     other files (such as manpages or scripts installed in the F<bin>
250     directory) are skipped.
251    
252     If there is an error, such as a module not being found, then this module
253     croaks (as opposed to silently skipping). If you want to add something of
254     which you are not sure it exists, then you can wrap the call into C<eval
255     {}>. In some cases, you can avoid this by executing the code you want
256     to work later using C<add_eval> - see C<add_core_support> for an actual
257     example of this technique.
258    
259     Note that packlists are meant to add files not covered by other
260     mechanisms, such as resource files and other data files loaded directly by
261     a module - they are not meant to add dependencies that are missed because
262     they only happen at runtime.
263    
264     For example, with packlists, when using L<AnyEvent>, then all event loop
265     backends are automatically added as well, but I<not> any event loops
266     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
267     packlists, only the backend that is being used is added (i.e. normally
268     none, as loading AnyEvent does not instantly load any backend).
269    
270     To catch the extra event loop dependencies, you can either initialise
271     AnyEvent so it picks a suitable backend:
272    
273     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
274    
275     Or you can directly load the backend modules you plan to use:
276    
277     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
278    
279     An example of a program (or module) that has extra resource files is
280     L<Deliantra::Client> - the normal tracing (without packlist usage) will
281     correctly add all submodules, but miss the fonts and textures. By using
282     the packlist, those files are added correctly.
283    
284     =over 4
285    
286     =cut
287    
288 root 1.1 sub _add {
289     my ($self, $add) = @_;
290    
291     my $lib = $self->{lib};
292     my $path;
293    
294     for (@$add) {
295 root 1.12 $path = "lib/$_";
296 root 1.1
297     $self->{set}{$path} ||= do {
298     my @info;
299    
300     $info[I_SRC] = $lib->{$_}
301     or croak "$_: unable to locate file in perl library";
302    
303 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
304 root 1.12 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
305 root 1.2 for @{ $self->{packlist}{$_} };
306 root 1.3
307     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
308     # s/\.pm$//;
309     # s%/%::%g;
310     # my $pkg = "libextractor" . ++$self->{count};
311     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
312     # unless $self->{_add_do}{$_}++;
313     # }
314     #
315     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
316     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
317    
318 root 1.2 } elsif (/^(.*)\.pm$/) {
319 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
320     $auto =~ m%/([^/]+)/$% or die;
321     my $base = $1;
322    
323     if (exists $lib->{$auto}) {
324     # auto dir exists, scan it for cool stuff
325    
326     # 1. shared object, others are of no interest to us
327     my $so = "$auto$base.$Config{dlext}";
328     if (my $src = $lib->{$so}) {
329 root 1.12 $so = "lib/$so";
330 root 1.1 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
331     }
332    
333     # 2. autoloader/autosplit
334     my $ix = "${auto}autosplit.ix";
335     if (my $src = $lib->{$ix}) {
336 root 1.12 $ix = "lib/$ix";
337 root 1.1 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
338    
339     open my $fh, "<:perlio", $src
340     or croak "$src: $!";
341    
342     my $package;
343    
344     while (<$fh>) {
345     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
346     my $al = "auto/$package/$1.al";
347     my $src = $lib->{$al}
348     or croak "$al: autoload file not found, but should be there.";
349    
350 root 1.12 $al = "lib/$al";
351 root 1.1 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
352    
353     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
354     ($package = $1) =~ s/::/\//g;
355     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
356     # nop
357     } else {
358     warn "WARNING: $src: unparsable line, please report: $_";
359     }
360     }
361     }
362    
363     skip:
364     }
365     }
366    
367     \@info
368     };
369     }
370     }
371    
372     sub _trace {
373     my ($self, $file, $eval) = @_;
374    
375 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
376 root 1.1 }
377    
378     sub _trace_flush {
379     my ($self) = @_;
380    
381 root 1.3 # ->_add might add additional files to trace
382     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
383     my $tmpdir = newdir File::Temp;
384     my $dir = $tmpdir->dirname;
385    
386     open my $fh, ">:perlio", "$dir/eval"
387     or croak "$dir/eval: $!";
388     syswrite $fh,
389     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
390     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
391     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
392     . "CHECK {\n"
393     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
394     . 'print join "\x00", values %INC;'
395     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
396     . "}\n"
397     . (delete $self->{trace_check});
398     close $fh;
399    
400 root 1.4 system _perl_path, "-c", "$dir/eval"
401 root 1.3 and croak "trace failure, check trace process output - caught";
402 root 1.1
403 root 1.3 my @inc = split /\x00/, do {
404     open my $fh, "<:perlio", "$dir/out"
405     or croak "$dir/out: $!";
406     local $/;
407     scalar readline $fh
408     };
409 root 1.1
410 root 1.3 my $matchprefix = $self->{matchprefix};
411 root 1.1
412 root 1.3 # remove the library directory prefix, hope for the best
413     s/$matchprefix//
414     or croak "$_: file outside any library directory"
415     for @inc;
416 root 1.1
417 root 1.3 $self->_add (\@inc);
418     }
419 root 1.1 }
420    
421     =item $extractor->add_mod ($module[, $module...])
422    
423     Adds the given module(s) to the file set - the module name must be specified
424     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
425    
426     The program will be loaded with the default import list, any dependent
427     files, such as the shared object implementing xs functions, or autoload
428     files, will also be added.
429    
430 root 1.5 If you want to use a different import list (for those rare modules wghere
431     import lists trigger different backend modules to be loaded for example),
432     you can use C<add_eval> instead:
433    
434     $extractor->add_eval ("use Module qw(a b c)");
435    
436 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
437     from the distribution they are part of.
438    
439     $extractor->add_mod ("Coro", "AnyEvent::AIO");
440    
441     =cut
442    
443     sub add_mod {
444     my $self = shift;
445    
446 root 1.3 for (@_) {
447     my $pkg = "libextractor" . ++$self->{count};
448     $self->_trace ("use $_", "{ package $pkg; use $_ }")
449     unless $self->{add_mod}{$_}++;
450     }
451 root 1.1 }
452    
453 root 1.12 =item $extractor->add_bin ($name[, $name...])
454 root 1.1
455     Adds the given (perl) program(s) to the file set, that is, a program
456     installed by some perl module, written in perl (an example would be the
457     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
458     distribution).
459    
460 root 1.5 Example: add the deliantra client program installed by the
461 root 1.12 L<Deliantra::Client> module and put it under F<bin/deliantra>.
462 root 1.1
463 root 1.12 $extractor->add_bin ("deliantra");
464 root 1.1
465     =cut
466    
467 root 1.12 sub add_bin {
468 root 1.1 my $self = shift;
469    
470     exe:
471     for my $exe (@_) {
472     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
473     if (open my $fh, "<:perlio", "$dir/$exe") {
474 root 1.12 if (-f $fh) {
475     my $file = do { local $/; readline $fh };
476 root 1.1
477 root 1.12 $self->_trace_flush if exists $self->{trace_check};
478     $self->{trace_check} = $file;
479 root 1.1
480 root 1.12 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
481     next exe;
482     }
483 root 1.1 }
484     }
485    
486 root 1.12 croak "add_bin ($exe): executable not found";
487 root 1.1 }
488     }
489    
490     =item $extractor->add_eval ($string)
491    
492 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
493     by it. For example, this would add L<AnyEvent> and the default backend
494     implementation module and event loop module:
495 root 1.1
496     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
497    
498 root 1.5 Each code snippet will be executed in its own package and under C<use
499     strict>.
500    
501 root 1.1 =cut
502    
503     sub add_eval {
504     my ($self, $eval) = @_;
505    
506 root 1.5 my $pkg = "libextractor" . ++$self->{count};
507 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
508 root 1.3 $self->_trace ($eval,
509     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
510 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
511 root 1.3 );
512 root 1.1 }
513    
514 root 1.5 =back
515    
516     =head2 OTHER METHODS FOR ADDING FILES
517    
518     The following methods add commonly used files that are either not covered
519     by other methods or add commonly-used dependencies.
520    
521     =over 4
522    
523 root 1.1 =item $extractor->add_perl
524    
525     Adds the perl binary itself to the file set, including the libperl dll, if
526     needed.
527    
528 root 1.12 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
529     some F<dll/libperl.so.XXX>.
530 root 1.5
531 root 1.4 =cut
532    
533     sub add_perl {
534     my ($self) = @_;
535    
536 root 1.12 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
537 root 1.4
538     # on debian, we have the special case of a perl binary linked against
539     # a static libperl.a (which is not available), but the Config says to use
540     # a shared library, which is in the wrong directory, too (which breaks
541     # every other perl installation on the system - they are so stupid).
542    
543     # that means we can't find the libperl.so, because dbeian actively breaks
544     # their perl install, and we don't need it. we work around this by silently
545     # not including the libperl if we cannot find it.
546    
547     if ($Config{useshrplib} eq "true") {
548 root 1.8 my ($libperl, $libpath);
549    
550     if ($^O eq "cygwin") {
551     $libperl = $Config{libperl};
552     $libpath = "$Config{binexp}/$libperl";
553     } elsif ($^O eq "MSWin32") {
554     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
555     $libpath = "$Config{binexp}/$libperl";
556     } else {
557     $libperl = $Config{libperl};
558     $libpath = $self->{lib}{"CORE/$libperl"};
559 root 1.4 }
560 root 1.8
561 root 1.12 $self->{set}{"dll/$libperl"} = $libpath
562 root 1.8 if length $libpath && -e $libpath;
563 root 1.4 }
564     }
565 root 1.3
566     =item $extractor->add_core_support
567    
568     Try to add modules and files needed to support commonly-used builtin
569     language features. For example to open a scalar for I/O you need the
570     L<PerlIO::scalar> module:
571    
572     open $fh, "<", \$scalar
573    
574     A number of regex and string features (e.g. C<ucfirst>) need some unicore
575     files, e.g.:
576    
577     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
578    
579     This call adds these files (simply by executing code similar to the above
580     code fragments).
581    
582     Notable things that are missing are other PerlIO layers, such as
583     L<PerlIO::encoding>, and named character and character class matches.
584    
585     =cut
586    
587     sub add_core_support {
588     my ($self) = @_;
589    
590     $self->add_eval ('my $v; open my $fh, "<", \$v');
591     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
592 root 1.10 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
593     $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
594 root 1.3 }
595 root 1.1
596 root 1.3 =item $extractor->add_unicore
597    
598 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
599 root 1.5 needed.
600    
601     If you are not sure which unicode character classes and similar unicore
602     databases you need, and you do not care about an extra one thousand(!)
603     files comprising 4MB of data, then you can just call this method, which
604     adds basically all files from perl's unicode database.
605    
606 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
607     subset of C<add_unicore> - the former adds all files neccessary to support
608     core builtins (which includes some unicore files and other things), while
609     the latter adds all unicore files (but nothing else).
610    
611     When in doubt, use both.
612    
613 root 1.5 =cut
614    
615     sub add_unicore {
616     my ($self) = @_;
617    
618     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
619     }
620    
621 root 1.11 =item $extractor->add_core
622    
623     This adds all files from the perl core distribution, that is, all library
624     files that come with perl.
625    
626     This is a superset of C<add_core_support> and C<add_unicore>.
627    
628     This is quite a lot, but on the plus side, you can be sure nothing is
629     missing.
630    
631 root 1.13 This requires a full perl installation - Debian GNU/Linux doesn't package
632     the full perl library, so this function will not work there.
633 root 1.11
634     =cut
635    
636     sub add_core {
637     my ($self) = @_;
638    
639     my $lib = $self->{lib};
640    
641     for (@{
642     $self->_read_packlist ($Config{privlibexp}, ".packlist")
643     }) {
644     $self->{set}{$_} ||= [
645 root 1.12 "lib/"
646 root 1.11 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
647     ];
648     }
649     }
650    
651 root 1.5 =back
652    
653     =head2 GLOB-BASED ADDING AND FILTERING
654 root 1.1
655 root 1.5 These methods add or manipulate files by using glob-based patterns.
656    
657     These glob patterns work similarly to glob patterns in the shell:
658    
659     =over 4
660    
661     =item /
662    
663     A F</> at the start of the pattern interprets the pattern as a file
664     path inside the file set, almost the same as in the shell. For example,
665     F</bin/perl*> would match all files whose names starting with F<perl>
666     inside the F<bin> directory in the set.
667    
668     If the F</> is missing, then the pattern is interpreted as a module name
669 root 1.12 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
670     while F<Coro::*> would match F<lib/Coro/*.pm>.
671 root 1.5
672     =item *
673    
674     A single star matches anything inside a single directory component. For
675     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
676     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
677 root 1.1
678 root 1.5 Another way to look at it is that a single star matches anything but a
679     slash (F</>).
680 root 1.1
681 root 1.5 =item **
682 root 1.1
683 root 1.5 A double star matches any number of characters in the path, including F</>.
684 root 1.1
685 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
686     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
687 root 1.1
688 root 1.5 =back
689 root 1.1
690 root 1.2 =cut
691    
692     sub _extglob2re {
693 root 1.5 for (quotemeta $_[1]) {
694 root 1.2 s/\\\*\\\*/.*/g;
695     s/\\\*/[^\/]*/g;
696     s/\\\?/[^\/]/g;
697    
698     unless (s%^\\/%%) {
699     s%\\:\\:%/%g;
700 root 1.12 $_ = "lib/$_\\.pm";
701 root 1.2 }
702    
703     $_ .= '$';
704     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
705    
706     return qr<^$_>s
707     }
708     }
709    
710 root 1.5 =over 4
711    
712     =item $extractor->add_glob ($modglob[, $modglob...])
713    
714     Adds all files from the perl library that match the given glob pattern.
715    
716     For example, you could implement C<add_unicore> yourself like this:
717    
718     $extractor->add_glob ("/unicore/**.pl");
719    
720     =cut
721    
722     sub add_glob {
723     my $self = shift;
724    
725     for (@_) {
726     my $pat = $self->_extglob2re ($_);
727     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
728     }
729     }
730    
731     =item $extractor->filter ($pattern[, $pattern...])
732    
733     Applies a series of include/exclude filters. Each filter must start with
734     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
735     pattern. The rest of the pattern is a normal glob pattern.
736    
737     An exclude pattern (C<->) instantly removes all matching files from
738     the set. An include pattern (C<+>) protects matching files from later
739     removals.
740    
741     That is, if you have an include pattern then all files that were matched
742     by it will be included in the set, regardless of any further exclude
743     patterns matching the same files.
744    
745     Likewise, any file excluded by a pattern will not be included in the set,
746     even if matched by later include patterns.
747    
748     Any files not matched by any expression will simply stay in the set.
749    
750     For example, to remove most of the useless autoload functions by the POSIX
751     module (they either do the same thing as a builtin or always raise an
752 root 1.12 error), you would use this:
753 root 1.5
754     $extractor->filter ("-/lib/auto/POSIX/*.al");
755    
756     This does not remove all autoload files, only the ones not defined by a
757     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
758    
759     =cut
760    
761 root 1.2 sub filter {
762     my ($self, @patterns) = @_;
763    
764 root 1.3 $self->_trace_flush;
765    
766     my $set = $self->{set};
767     my %include;
768 root 1.2
769     for my $pat (@patterns) {
770     $pat =~ s/^([+\-])//
771     or croak "$_: not a valid filter pattern (missing + or - prefix)";
772     my $inc = $1 eq "+";
773     $pat = $self->_extglob2re ($pat);
774 root 1.3
775     my @match = grep /$pat/, keys %$set;
776    
777     if ($inc) {
778     @include{@match} = delete @$set{@match};
779     } else {
780     delete @$set{@{ $_->[I_DEP] }} # remove dependents
781     for delete @$set{@match};
782     }
783 root 1.2 }
784 root 1.3
785     my @include = keys %include;
786     @$set{@include} = delete @include{@include};
787 root 1.2 }
788 root 1.1
789 root 1.3 =item $extractor->runtime_only
790    
791     This removes all files that are not needed at runtime, such as static
792     archives, header and other files needed only for compilation of modules,
793     and pod and html files (which are unlikely to be needed at runtime).
794 root 1.1
795 root 1.8 This is quite useful when you want to have only files actually needed to
796 root 1.3 execute a program.
797    
798     =cut
799    
800     sub runtime_only {
801     my ($self) = @_;
802    
803     $self->_trace_flush;
804    
805     my $set = $self->{set};
806    
807 root 1.8 # delete all static libraries, also windows stuff
808 root 1.12 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
809 root 1.3
810     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
811 root 1.12 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
812 root 1.3
813     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
814 root 1.12 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
815 root 1.11
816     # delete unneeded unicore files
817 root 1.12 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
818 root 1.3 }
819 root 1.1
820     =back
821    
822 root 1.5 =head2 RESULT SET
823    
824     =over 4
825 root 1.1
826 root 1.5 =item $set = $extractor->set
827 root 1.1
828 root 1.5 Returns a hash reference that represents the result set. The hash is the
829     actual internal storage hash and can only be modified as described below.
830 root 1.1
831 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
832     e.g.:
833 root 1.1
834 root 1.5 bin/perl
835     lib/unicore/lib/Blk/Superscr.pl
836     lib/AnyEvent/Impl/EV.pm
837    
838     The value is an array reference with mostly unspecified contents, except
839     the first element, which is the file system path where the actual file can
840     be found.
841    
842     This code snippet lists all files inside the set:
843    
844     print "$_\n"
845     for sort keys %{ $extractor->set });
846    
847     This code fragment prints C<< filesystem_path => set_path >> pairs for all
848     files in the set:
849    
850     my $set = $extractor->set;
851     while (my ($set,$fspath) = each %$set) {
852     print "$fspath => $set\n";
853     }
854 root 1.1
855 root 1.5 You can implement your own filtering by asking for the result set with
856     C<< $extractor->set >>, and then deleting keys from the referenced hash
857     - since you can ask for the result set at any time you can add things,
858     filter them out this way, and add additional things.
859    
860     =back
861    
862     =cut
863    
864     sub set {
865 root 1.6 $_[0]->_trace_flush;
866 root 1.5 $_[0]{set}
867     }
868    
869     =head1 EXAMPLE
870    
871     To package he deliantra client (L<Deliantra::Client>), finding all
872     (perl) files needed to run it is a first step. This can be done by using
873     something like the following code snippet:
874    
875 root 1.12 my $ex = new Perl::LibExtractor;
876 root 1.5
877     $ex->add_perl;
878     $ex->add_core_support;
879 root 1.12 $ex->add_bin ("deliantra");
880 root 1.5 $ex->add_mod ("AnyEvent::Impl::EV");
881     $ex->add_mod ("AnyEvent::Impl::Perl");
882     $ex->add_mod ("Urlader");
883     $ex->filter ("-/*/auto/POSIX/**.al");
884     $ex->runtime_only;
885    
886     First it sets the perl library directory to F<pm> and F<.> (the latter
887     to work around some AutoLoader bugs), so perl uses only the perl library
888     files that came with the binary package.
889    
890     Then it sets some environment variable to override the system default
891     (which might be incompatible).
892    
893     Then it runs the client itself, using C<require>. Since C<require> only
894     looks in the perl library directory this is the reaosn why the scripts
895     were put there (of course, since F<.> is also included it doesn't matter,
896     but I refuse to yield to bugs).
897    
898     Finally it exits with a clean status to signal "ok" to Urlader.
899    
900     Back to the original C<Perl::LibExtractor> script: after initialising a
901     new set, the script simply adds the F<perl> interpreter and core support
902     files (just in case, not all are needed, but some are, and I am too lazy
903     to find out which ones exactly).
904    
905     Then it adds the deliantra executable itself, which in turn adds most of
906     the required modules. After that, the AnyEvent implementation modules are
907     added because these dependencies are not picked up automatically.
908    
909     The L<Urlader> module is added because the client itself does not depend
910     on it at all, but the wrapper does.
911    
912     At this point, all required files are present, and it's time to slim
913     down: most of the ueseless POSIX autoloaded functions are removed,
914     not because they are so big, but because creating files is a costly
915     operation in itself, so even small fiels have considerable overhead when
916     unpacking. Then files not required for running the client are removed.
917    
918     And that concludes it, the set is now ready.
919 root 1.1
920     =head1 SEE ALSO
921    
922     The utility program that comes with this module: L<perl-libextract>.
923    
924     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
925    
926 root 1.5 =head1 LICENSE
927    
928     This software package is licensed under the GPL version 3 or any later
929     version, see COPYING for details.
930    
931     This license does not, of course, apply to any output generated by this
932     software.
933    
934 root 1.1 =head1 AUTHOR
935    
936     Marc Lehmann <schmorp@schmorp.de>
937     http://home.schmorp.de/
938    
939     =cut
940    
941     1;
942