ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.11
Committed: Tue Jan 17 21:41:41 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.10: +69 -20 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72     The following key-value pairs exist, with default values as specified.
73    
74     =over 4
75    
76     =item exedir => "bin"
77    
78 root 1.5 The prefix to use for the suggested target path for perl executables
79     (scripts). Defaults to F<bin>.
80 root 1.1
81     =item libdir => "lib"
82    
83     The prefix to use for the suggested target path of perl library
84     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
85     etc.). Defaults to F<lib>.
86    
87     =item bindir => "bin"
88    
89     The prefix to use for the suggested target path for (non-perl)
90     executables. Defaults to F<bin>.
91    
92     =item dlldir => "bin"
93    
94     The prefix to use for the suggested target path of any shared
95     libraries. Defaults to F<bin>.
96    
97     =item inc => \@INC without "."
98    
99     An arrayref with paths to perl library directories. The default is
100     C<\@INC>, with F<.> removed.
101    
102     To prepend custom dirs just do this:
103    
104     inc => ["mydir", @INC],
105    
106 root 1.3 =item use_packlist => 1
107 root 1.1
108 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
109 root 1.5 then each time a file is traced, the complete distribution that contains
110     it is included (but not traced).
111    
112     If disabled, only shared objects and autoload files will be added.
113 root 1.1
114     =back
115    
116     =cut
117    
118     sub new {
119     my ($class, %kv) = @_;
120    
121     my $self = bless {
122     exedir => "bin",
123     libdir => "lib",
124     bindir => "bin",
125     dlldir => "bin",
126     inc => [grep $_ ne ".", @INC],
127 root 1.3 use_packlist => 1,
128 root 1.1 %kv,
129     set => {},
130     }, $class;
131    
132     my %inc_seen;
133 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
134 root 1.1 $self->{inc} = \@inc;
135    
136     $self->_set_inc;
137    
138     $self
139     }
140    
141 root 1.4 sub _perl_path() {
142     my $secure_perl_path = $Config{perlpath};
143    
144     if ($^O ne 'VMS') {
145     $secure_perl_path .= $Config{_exe}
146     unless $secure_perl_path =~ m/$Config{_exe}$/i;
147     }
148    
149     $secure_perl_path
150     }
151    
152 root 1.8 sub _path2match {
153     my $re = join "|", map "\Q$_", @_;
154    
155     $re = "(?:$re)\\/";
156     $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
157    
158     $case_tolerant
159     ? qr<$re>i
160     : qr<$re>
161     }
162    
163 root 1.11 sub _read_packlist {
164     my ($self, $root, $path) = @_;
165    
166     my $lib = $self->{lib};
167    
168     my @packlist;
169    
170     open my $fh, "<:perlio", "$root/$path"
171     or die "$root/$path: $!";
172    
173     $root = _path2match $root;
174    
175     while (<$fh>) {
176     chomp;
177     s/ .*$//; # newer-style .packlists might contain key=value pairs
178    
179     s/$root// and exists $lib->{$_}
180     or next;
181    
182     push @packlist, canonpath $_;
183     }
184    
185     \@packlist
186     }
187    
188 root 1.1 sub _set_inc {
189     my ($self) = @_;
190    
191 root 1.8 my $matchprefix = _path2match @{ $self->{inc }};
192 root 1.2
193 root 1.1 my %lib;
194 root 1.2 my @packlists;
195 root 1.1
196     # find all files in all libdirs, earlier ones overwrite later ones
197 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
198    
199     while (@scan) {
200     my ($root, $dir) = @{ pop @scan };
201    
202     my $pfx = length $dir ? "$dir/" : "";
203    
204     for (do {
205     opendir my $fh, "$root/$dir"
206     or croak "$root/$dir: $!";
207     grep !/^\.\.?$/, readdir $fh
208     }) {
209     if (-d "$root/$dir/$_/.") {
210     $lib{"$pfx$_/"} = "$root/$pfx$_";
211     push @scan, [$root, "$pfx$_"];
212     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
213     push @packlists, [$root, $pfx];
214     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
215     # skip empty .bs files
216     # } elsif (/\.(?:pod|h|html)$/) {
217     # # not interested in those
218     } else {
219     #push @files, $_;
220     $lib{"$pfx$_"} = "$root/$pfx$_";
221 root 1.1 }
222 root 1.2 }
223    
224     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
225     }
226    
227     my %packlist;
228 root 1.1
229 root 1.2 # need to go forward here
230     for (@packlists) {
231 root 1.11 my $packlist = $self->_read_packlist ($_->[0], "$_->[1]/.packlist");
232 root 1.2
233 root 1.11 $packlist{$_} = $packlist
234     for @$packlist;
235 root 1.1 }
236    
237 root 1.8 $self->{lib} = \%lib;
238     $self->{packlist} = \%packlist;
239 root 1.2 $self->{matchprefix} = $matchprefix;
240 root 1.1 }
241    
242 root 1.5 =back
243    
244     =head2 TRACE/PACKLIST BASED ADDING
245    
246     The following methods add various things to the set of files.
247    
248     Each time a perl file is added, it is scanned by tracing either loading,
249     execution or compiling it, and seeing which other perl modules and
250     libraries have been loaded.
251    
252     For each library file found this way, additional dependencies are added:
253     if packlists are enabled, then all files of the distribution that contains
254     the file will be added. If packlists are disabled, then only shared
255     objects and autoload files for modules will be added.
256    
257     Only files from perl library directories will be added automatically. Any
258     other files (such as manpages or scripts installed in the F<bin>
259     directory) are skipped.
260    
261     If there is an error, such as a module not being found, then this module
262     croaks (as opposed to silently skipping). If you want to add something of
263     which you are not sure it exists, then you can wrap the call into C<eval
264     {}>. In some cases, you can avoid this by executing the code you want
265     to work later using C<add_eval> - see C<add_core_support> for an actual
266     example of this technique.
267    
268     Note that packlists are meant to add files not covered by other
269     mechanisms, such as resource files and other data files loaded directly by
270     a module - they are not meant to add dependencies that are missed because
271     they only happen at runtime.
272    
273     For example, with packlists, when using L<AnyEvent>, then all event loop
274     backends are automatically added as well, but I<not> any event loops
275     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
276     packlists, only the backend that is being used is added (i.e. normally
277     none, as loading AnyEvent does not instantly load any backend).
278    
279     To catch the extra event loop dependencies, you can either initialise
280     AnyEvent so it picks a suitable backend:
281    
282     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
283    
284     Or you can directly load the backend modules you plan to use:
285    
286     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
287    
288     An example of a program (or module) that has extra resource files is
289     L<Deliantra::Client> - the normal tracing (without packlist usage) will
290     correctly add all submodules, but miss the fonts and textures. By using
291     the packlist, those files are added correctly.
292    
293     =over 4
294    
295     =cut
296    
297 root 1.1 sub _add {
298     my ($self, $add) = @_;
299    
300     my $lib = $self->{lib};
301     my $path;
302    
303     for (@$add) {
304     $path = "$self->{libdir}/$_";
305    
306     $self->{set}{$path} ||= do {
307     my @info;
308    
309     $info[I_SRC] = $lib->{$_}
310     or croak "$_: unable to locate file in perl library";
311    
312 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
313 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
314     for @{ $self->{packlist}{$_} };
315 root 1.3
316     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
317     # s/\.pm$//;
318     # s%/%::%g;
319     # my $pkg = "libextractor" . ++$self->{count};
320     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
321     # unless $self->{_add_do}{$_}++;
322     # }
323     #
324     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
325     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
326    
327 root 1.2 } elsif (/^(.*)\.pm$/) {
328 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
329     $auto =~ m%/([^/]+)/$% or die;
330     my $base = $1;
331    
332     if (exists $lib->{$auto}) {
333     # auto dir exists, scan it for cool stuff
334    
335     # 1. shared object, others are of no interest to us
336     my $so = "$auto$base.$Config{dlext}";
337     if (my $src = $lib->{$so}) {
338     $so = "$self->{libdir}/$so";
339     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
340     }
341    
342     # 2. autoloader/autosplit
343     my $ix = "${auto}autosplit.ix";
344     if (my $src = $lib->{$ix}) {
345     $ix = "$self->{libdir}/$ix";
346     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
347    
348     open my $fh, "<:perlio", $src
349     or croak "$src: $!";
350    
351     my $package;
352    
353     while (<$fh>) {
354     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
355     my $al = "auto/$package/$1.al";
356     my $src = $lib->{$al}
357     or croak "$al: autoload file not found, but should be there.";
358    
359     $al = "$self->{libdir}/$al";
360     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
361    
362     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
363     ($package = $1) =~ s/::/\//g;
364     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
365     # nop
366     } else {
367     warn "WARNING: $src: unparsable line, please report: $_";
368     }
369     }
370     }
371    
372     skip:
373     }
374     }
375    
376     \@info
377     };
378     }
379     }
380    
381     sub _trace {
382     my ($self, $file, $eval) = @_;
383    
384 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
385 root 1.1 }
386    
387     sub _trace_flush {
388     my ($self) = @_;
389    
390 root 1.3 # ->_add might add additional files to trace
391     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
392     my $tmpdir = newdir File::Temp;
393     my $dir = $tmpdir->dirname;
394    
395     open my $fh, ">:perlio", "$dir/eval"
396     or croak "$dir/eval: $!";
397     syswrite $fh,
398     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
399     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
400     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
401     . "CHECK {\n"
402     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
403     . 'print join "\x00", values %INC;'
404     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
405     . "}\n"
406     . (delete $self->{trace_check});
407     close $fh;
408    
409 root 1.4 system _perl_path, "-c", "$dir/eval"
410 root 1.3 and croak "trace failure, check trace process output - caught";
411 root 1.1
412 root 1.3 my @inc = split /\x00/, do {
413     open my $fh, "<:perlio", "$dir/out"
414     or croak "$dir/out: $!";
415     local $/;
416     scalar readline $fh
417     };
418 root 1.1
419 root 1.3 my $matchprefix = $self->{matchprefix};
420 root 1.1
421 root 1.3 # remove the library directory prefix, hope for the best
422     s/$matchprefix//
423     or croak "$_: file outside any library directory"
424     for @inc;
425 root 1.1
426 root 1.3 $self->_add (\@inc);
427     }
428 root 1.1 }
429    
430     =item $extractor->add_mod ($module[, $module...])
431    
432     Adds the given module(s) to the file set - the module name must be specified
433     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
434    
435     The program will be loaded with the default import list, any dependent
436     files, such as the shared object implementing xs functions, or autoload
437     files, will also be added.
438    
439 root 1.5 If you want to use a different import list (for those rare modules wghere
440     import lists trigger different backend modules to be loaded for example),
441     you can use C<add_eval> instead:
442    
443     $extractor->add_eval ("use Module qw(a b c)");
444    
445 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
446     from the distribution they are part of.
447    
448     $extractor->add_mod ("Coro", "AnyEvent::AIO");
449    
450     =cut
451    
452     sub add_mod {
453     my $self = shift;
454    
455 root 1.3 for (@_) {
456     my $pkg = "libextractor" . ++$self->{count};
457     $self->_trace ("use $_", "{ package $pkg; use $_ }")
458     unless $self->{add_mod}{$_}++;
459     }
460 root 1.1 }
461    
462 root 1.5 =item $extractor->add_script ($name[, $name...])
463 root 1.1
464     Adds the given (perl) program(s) to the file set, that is, a program
465     installed by some perl module, written in perl (an example would be the
466     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
467     distribution).
468    
469 root 1.5 Example: add the deliantra client program installed by the
470     L<Deliantra::Client> module.
471 root 1.1
472 root 1.5 $extractor->add_script ("deliantra");
473 root 1.1
474     =cut
475    
476 root 1.5 sub add_script {
477 root 1.1 my $self = shift;
478    
479     exe:
480     for my $exe (@_) {
481     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
482     if (open my $fh, "<:perlio", "$dir/$exe") {
483    
484     my $file = do { local $/; readline $fh };
485    
486     $self->_trace_flush if exists $self->{trace_check};
487     $self->{trace_check} = $file;
488    
489 root 1.7 $self->{set}{"$self->{bindir}/$exe"} = ["$dir/$exe"];
490 root 1.1 next exe;
491     }
492     }
493    
494 root 1.5 croak "add_script ($exe): executable not found";
495 root 1.1 }
496     }
497    
498     =item $extractor->add_eval ($string)
499    
500 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
501     by it. For example, this would add L<AnyEvent> and the default backend
502     implementation module and event loop module:
503 root 1.1
504     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
505    
506 root 1.5 Each code snippet will be executed in its own package and under C<use
507     strict>.
508    
509 root 1.1 =cut
510    
511     sub add_eval {
512     my ($self, $eval) = @_;
513    
514 root 1.5 my $pkg = "libextractor" . ++$self->{count};
515 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
516 root 1.3 $self->_trace ($eval,
517     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
518 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
519 root 1.3 );
520 root 1.1 }
521    
522 root 1.5 =back
523    
524     =head2 OTHER METHODS FOR ADDING FILES
525    
526     The following methods add commonly used files that are either not covered
527     by other methods or add commonly-used dependencies.
528    
529     =over 4
530    
531 root 1.1 =item $extractor->add_perl
532    
533     Adds the perl binary itself to the file set, including the libperl dll, if
534     needed.
535    
536 root 1.5 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
537     some F<lib/libperl.so.XXX>.
538    
539 root 1.4 =cut
540    
541     sub add_perl {
542     my ($self) = @_;
543    
544 root 1.7 $self->{set}{"$self->{exedir}/perl$Config{_exe}"} = [_perl_path];
545 root 1.4
546     # on debian, we have the special case of a perl binary linked against
547     # a static libperl.a (which is not available), but the Config says to use
548     # a shared library, which is in the wrong directory, too (which breaks
549     # every other perl installation on the system - they are so stupid).
550    
551     # that means we can't find the libperl.so, because dbeian actively breaks
552     # their perl install, and we don't need it. we work around this by silently
553     # not including the libperl if we cannot find it.
554    
555     if ($Config{useshrplib} eq "true") {
556 root 1.8 my ($libperl, $libpath);
557    
558     if ($^O eq "cygwin") {
559     $libperl = $Config{libperl};
560     $libpath = "$Config{binexp}/$libperl";
561     } elsif ($^O eq "MSWin32") {
562     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
563     $libpath = "$Config{binexp}/$libperl";
564     } else {
565     $libperl = $Config{libperl};
566     $libpath = $self->{lib}{"CORE/$libperl"};
567 root 1.4 }
568 root 1.8
569     $self->{set}{"$self->{dlldir}/$libperl"} = $libpath
570     if length $libpath && -e $libpath;
571 root 1.4 }
572     }
573 root 1.3
574     =item $extractor->add_core_support
575    
576     Try to add modules and files needed to support commonly-used builtin
577     language features. For example to open a scalar for I/O you need the
578     L<PerlIO::scalar> module:
579    
580     open $fh, "<", \$scalar
581    
582     A number of regex and string features (e.g. C<ucfirst>) need some unicore
583     files, e.g.:
584    
585     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
586    
587     This call adds these files (simply by executing code similar to the above
588     code fragments).
589    
590     Notable things that are missing are other PerlIO layers, such as
591     L<PerlIO::encoding>, and named character and character class matches.
592    
593     =cut
594    
595     sub add_core_support {
596     my ($self) = @_;
597    
598     $self->add_eval ('my $v; open my $fh, "<", \$v');
599     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
600 root 1.10 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
601     $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
602 root 1.3 }
603 root 1.1
604 root 1.3 =item $extractor->add_unicore
605    
606 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
607 root 1.5 needed.
608    
609     If you are not sure which unicode character classes and similar unicore
610     databases you need, and you do not care about an extra one thousand(!)
611     files comprising 4MB of data, then you can just call this method, which
612     adds basically all files from perl's unicode database.
613    
614 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
615     subset of C<add_unicore> - the former adds all files neccessary to support
616     core builtins (which includes some unicore files and other things), while
617     the latter adds all unicore files (but nothing else).
618    
619     When in doubt, use both.
620    
621 root 1.5 =cut
622    
623     sub add_unicore {
624     my ($self) = @_;
625    
626     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
627     }
628    
629 root 1.11 =item $extractor->add_core
630    
631     This adds all files from the perl core distribution, that is, all library
632     files that come with perl.
633    
634     This is a superset of C<add_core_support> and C<add_unicore>.
635    
636     This is quite a lot, but on the plus side, you can be sure nothing is
637     missing.
638    
639     This requires a full perl installation - many distributions (Debian
640     GNU/Linux for example) don't package all library files that perl installs.
641    
642     =cut
643    
644     sub add_core {
645     my ($self) = @_;
646    
647     my $lib = $self->{lib};
648    
649     for (@{
650     $self->_read_packlist ($Config{privlibexp}, ".packlist")
651     }) {
652     $self->{set}{$_} ||= [
653     "$self->{libdir}/"
654     . ($lib->{$_} or croak "$_: unable to locate file in perl library")
655     ];
656     }
657     }
658    
659 root 1.5 =back
660    
661     =head2 GLOB-BASED ADDING AND FILTERING
662 root 1.1
663 root 1.5 These methods add or manipulate files by using glob-based patterns.
664    
665     These glob patterns work similarly to glob patterns in the shell:
666    
667     =over 4
668    
669     =item /
670    
671     A F</> at the start of the pattern interprets the pattern as a file
672     path inside the file set, almost the same as in the shell. For example,
673     F</bin/perl*> would match all files whose names starting with F<perl>
674     inside the F<bin> directory in the set.
675    
676     If the F</> is missing, then the pattern is interpreted as a module name
677     (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
678     (where F<libdir> is the perl library directory), while F<Coro::*> would
679     match F<libdir/Coro/*.pm>.
680    
681     =item *
682    
683     A single star matches anything inside a single directory component. For
684     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
685     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
686 root 1.1
687 root 1.5 Another way to look at it is that a single star matches anything but a
688     slash (F</>).
689 root 1.1
690 root 1.5 =item **
691 root 1.1
692 root 1.5 A double star matches any number of characters in the path, including F</>.
693 root 1.1
694 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
695     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
696 root 1.1
697 root 1.5 =back
698 root 1.1
699 root 1.2 =cut
700    
701     sub _extglob2re {
702 root 1.5 for (quotemeta $_[1]) {
703 root 1.2 s/\\\*\\\*/.*/g;
704     s/\\\*/[^\/]*/g;
705     s/\\\?/[^\/]/g;
706    
707     unless (s%^\\/%%) {
708     s%\\:\\:%/%g;
709 root 1.5 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
710 root 1.2 }
711    
712     $_ .= '$';
713     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
714    
715     return qr<^$_>s
716     }
717     }
718    
719 root 1.5 =over 4
720    
721     =item $extractor->add_glob ($modglob[, $modglob...])
722    
723     Adds all files from the perl library that match the given glob pattern.
724    
725     For example, you could implement C<add_unicore> yourself like this:
726    
727     $extractor->add_glob ("/unicore/**.pl");
728    
729     =cut
730    
731     sub add_glob {
732     my $self = shift;
733    
734     for (@_) {
735     my $pat = $self->_extglob2re ($_);
736     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
737     }
738     }
739    
740     =item $extractor->filter ($pattern[, $pattern...])
741    
742     Applies a series of include/exclude filters. Each filter must start with
743     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
744     pattern. The rest of the pattern is a normal glob pattern.
745    
746     An exclude pattern (C<->) instantly removes all matching files from
747     the set. An include pattern (C<+>) protects matching files from later
748     removals.
749    
750     That is, if you have an include pattern then all files that were matched
751     by it will be included in the set, regardless of any further exclude
752     patterns matching the same files.
753    
754     Likewise, any file excluded by a pattern will not be included in the set,
755     even if matched by later include patterns.
756    
757     Any files not matched by any expression will simply stay in the set.
758    
759     For example, to remove most of the useless autoload functions by the POSIX
760     module (they either do the same thing as a builtin or always raise an
761     error), you would use this (assuming a default C<libdir>):
762    
763     $extractor->filter ("-/lib/auto/POSIX/*.al");
764    
765     This does not remove all autoload files, only the ones not defined by a
766     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
767    
768     =cut
769    
770 root 1.2 sub filter {
771     my ($self, @patterns) = @_;
772    
773 root 1.3 $self->_trace_flush;
774    
775     my $set = $self->{set};
776     my %include;
777 root 1.2
778     for my $pat (@patterns) {
779     $pat =~ s/^([+\-])//
780     or croak "$_: not a valid filter pattern (missing + or - prefix)";
781     my $inc = $1 eq "+";
782     $pat = $self->_extglob2re ($pat);
783 root 1.3
784     my @match = grep /$pat/, keys %$set;
785    
786     if ($inc) {
787     @include{@match} = delete @$set{@match};
788     } else {
789     delete @$set{@{ $_->[I_DEP] }} # remove dependents
790     for delete @$set{@match};
791     }
792 root 1.2 }
793 root 1.3
794     my @include = keys %include;
795     @$set{@include} = delete @include{@include};
796 root 1.2 }
797 root 1.1
798 root 1.3 =item $extractor->runtime_only
799    
800     This removes all files that are not needed at runtime, such as static
801     archives, header and other files needed only for compilation of modules,
802     and pod and html files (which are unlikely to be needed at runtime).
803 root 1.1
804 root 1.8 This is quite useful when you want to have only files actually needed to
805 root 1.3 execute a program.
806    
807     =cut
808    
809     sub runtime_only {
810     my ($self) = @_;
811    
812     $self->_trace_flush;
813    
814     my $set = $self->{set};
815    
816 root 1.8 # delete all static libraries, also windows stuff
817     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
818 root 1.3
819     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
820     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
821    
822     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
823 root 1.8 delete @$set{ grep m%^\Q$self->{libdir}\E/.*\.(?:pod|h|html)$%s, keys %$set };
824 root 1.11
825     # delete unneeded unicore files
826     delete @$set{ grep m%^\Q$self->{libdir}\E/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
827 root 1.3 }
828 root 1.1
829     =back
830    
831 root 1.5 =head2 RESULT SET
832    
833     =over 4
834 root 1.1
835 root 1.5 =item $set = $extractor->set
836 root 1.1
837 root 1.5 Returns a hash reference that represents the result set. The hash is the
838     actual internal storage hash and can only be modified as described below.
839 root 1.1
840 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
841     e.g.:
842 root 1.1
843 root 1.5 bin/perl
844     lib/unicore/lib/Blk/Superscr.pl
845     lib/AnyEvent/Impl/EV.pm
846    
847     The value is an array reference with mostly unspecified contents, except
848     the first element, which is the file system path where the actual file can
849     be found.
850    
851     This code snippet lists all files inside the set:
852    
853     print "$_\n"
854     for sort keys %{ $extractor->set });
855    
856     This code fragment prints C<< filesystem_path => set_path >> pairs for all
857     files in the set:
858    
859     my $set = $extractor->set;
860     while (my ($set,$fspath) = each %$set) {
861     print "$fspath => $set\n";
862     }
863 root 1.1
864 root 1.5 You can implement your own filtering by asking for the result set with
865     C<< $extractor->set >>, and then deleting keys from the referenced hash
866     - since you can ask for the result set at any time you can add things,
867     filter them out this way, and add additional things.
868    
869     =back
870    
871     =cut
872    
873     sub set {
874 root 1.6 $_[0]->_trace_flush;
875 root 1.5 $_[0]{set}
876     }
877    
878     =head1 EXAMPLE
879    
880     To package he deliantra client (L<Deliantra::Client>), finding all
881     (perl) files needed to run it is a first step. This can be done by using
882     something like the following code snippet:
883    
884     my $ex = new Perl::LibExtractor
885     exedir => ".", dlldir => ".",
886     libdir => "pm", bindir => "pm/bin";
887    
888     $ex->add_perl;
889     $ex->add_core_support;
890     $ex->add_script ("deliantra");
891     $ex->add_mod ("AnyEvent::Impl::EV");
892     $ex->add_mod ("AnyEvent::Impl::Perl");
893     $ex->add_mod ("Urlader");
894     $ex->filter ("-/*/auto/POSIX/**.al");
895     $ex->runtime_only;
896    
897     Let's first find out about the choice of paths for the subset. The
898     Deliantra client binary packages use L<Urlader> nowadays, and there it is
899     convenient to have F<perl> and any shared libraries directly in the root
900     of the distribution.
901    
902     The perl library files are put into a directory named F<pm>, simply
903     because it's shorter than F<lib>, and in the future, some files might go
904     into F<lib>.
905    
906     And finally, the F<deliantra> script itself is put into the perl library
907     directory, because it is not run directly - the installed client uses the
908     system fonts and other resources, while the binary package is supposed
909     to use the files packaged with it. To achieve this, a wrapper script is
910     created, called F<run>; which displays a splash screen and configures the
911     environment. A simplified version of it could look like this:
912    
913     @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
914     $ENV{PANGO_RC_FILE} = "pango.rc";
915     require "bin/deliantra";
916     exit 0;
917    
918     First it sets the perl library directory to F<pm> and F<.> (the latter
919     to work around some AutoLoader bugs), so perl uses only the perl library
920     files that came with the binary package.
921    
922     Then it sets some environment variable to override the system default
923     (which might be incompatible).
924    
925     Then it runs the client itself, using C<require>. Since C<require> only
926     looks in the perl library directory this is the reaosn why the scripts
927     were put there (of course, since F<.> is also included it doesn't matter,
928     but I refuse to yield to bugs).
929    
930     Finally it exits with a clean status to signal "ok" to Urlader.
931    
932     Back to the original C<Perl::LibExtractor> script: after initialising a
933     new set, the script simply adds the F<perl> interpreter and core support
934     files (just in case, not all are needed, but some are, and I am too lazy
935     to find out which ones exactly).
936    
937     Then it adds the deliantra executable itself, which in turn adds most of
938     the required modules. After that, the AnyEvent implementation modules are
939     added because these dependencies are not picked up automatically.
940    
941     The L<Urlader> module is added because the client itself does not depend
942     on it at all, but the wrapper does.
943    
944     At this point, all required files are present, and it's time to slim
945     down: most of the ueseless POSIX autoloaded functions are removed,
946     not because they are so big, but because creating files is a costly
947     operation in itself, so even small fiels have considerable overhead when
948     unpacking. Then files not required for running the client are removed.
949    
950     And that concludes it, the set is now ready.
951 root 1.1
952     =head1 SEE ALSO
953    
954     The utility program that comes with this module: L<perl-libextract>.
955    
956     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
957    
958 root 1.5 =head1 LICENSE
959    
960     This software package is licensed under the GPL version 3 or any later
961     version, see COPYING for details.
962    
963     This license does not, of course, apply to any output generated by this
964     software.
965    
966 root 1.1 =head1 AUTHOR
967    
968     Marc Lehmann <schmorp@schmorp.de>
969     http://home.schmorp.de/
970    
971     =cut
972    
973     1;
974