ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.25
Committed: Mon Sep 23 07:57:55 2013 UTC (10 years, 9 months ago) by root
Branch: MAIN
Changes since 1.24: +11 -4 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27 root 1.23 The command-line utility F<perl-libextractor> can be a convenient
28 root 1.2 alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36 root 1.22 our $VERSION = '1.1';
37 root 1.1
38     use Config;
39 root 1.25 use Cwd ();
40 root 1.7 use File::Spec ();
41 root 1.1 use File::Temp ();
42    
43     use common::sense;
44    
45     sub I_SRC () { 0 }
46     sub I_DEP () { 1 }
47    
48     sub croak($) {
49     require Carp;
50     Carp::croak "(Perl::LibExtractor) $_[0]";
51     }
52    
53 root 1.8 my $canonpath = File::Spec->can ("canonpath");
54     my $case_tolerant = File::Spec->case_tolerant;
55 root 1.7
56     sub canonpath($) {
57     local $_ = $canonpath->(File::Spec::, $_[0]);
58     s%\\%/%g;
59 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
60 root 1.7 $_
61     }
62    
63 root 1.5 =head2 CREATION
64    
65     =over 4
66    
67 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
68    
69     Creates a new extractor object. Each extractor object stores some
70     configuration options and a subset of files that can be queried at any
71     time,.
72    
73 root 1.12 Binary executables (such as the perl interpreter) are stored inside
74     F<bin/>, perl scripts are stored under F<script/>, perl library files are
75     stored under F<lib/> and shared libraries are stored under F<dll/>.
76    
77 root 1.1 The following key-value pairs exist, with default values as specified.
78    
79     =over 4
80    
81 root 1.25 =item inc => \@INC without any initial "."
82 root 1.1
83     An arrayref with paths to perl library directories. The default is
84 root 1.25 C<\@INC>, with the first entry removed if it is F<.>. Internally, all
85     paths will be changed to absolute paths.
86 root 1.1
87     To prepend custom dirs just do this:
88    
89     inc => ["mydir", @INC],
90    
91 root 1.3 =item use_packlist => 1
92 root 1.1
93 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
94 root 1.5 then each time a file is traced, the complete distribution that contains
95     it is included (but not traced).
96    
97     If disabled, only shared objects and autoload files will be added.
98 root 1.1
99 root 1.13 Debian GNU/Linux doesn't completely package perl or any perl modules, so
100     this option will fail. Other perls should be fine.
101    
102 root 1.16 =item extra_deps => { file => [files...] }
103    
104     Some (mainly runtime dependencies in the perl core library) cannot be
105     detected automatically by this module, especially if you don't use
106     packlists and C<add_core>.
107    
108     This module comes with a set of default dependencies (such as L<Carp>
109     requiring L<Carp::Heavy>), which you cna override with this parameter.
110    
111     To see the default set of dependencies that come with this module, use
112     this:
113    
114     perl -MPerl::LibExtractor -MData::Dumper -e 'print Dumper $Perl::LibExtractor::EXTRA_DEPS'
115    
116 root 1.1 =back
117    
118     =cut
119    
120 root 1.16 our $EXTRA_DEPS = {
121 root 1.17 'bytes.pm' => ['bytes_heavy.pl'],
122     'utf8.pm' => ['utf8_heavy.pl'],
123     'Config.pm' => ['Config_heavy.pl', 'Config_git.pl'],
124     'Carp.pm' => ['Carp/Heavy.pm'],
125     'Exporter.pm' => ['Exporter/Heavy.pm'],
126 root 1.16 };
127    
128 root 1.1 sub new {
129     my ($class, %kv) = @_;
130    
131     my $self = bless {
132 root 1.3 use_packlist => 1,
133 root 1.16 extra_deps => $EXTRA_DEPS,
134 root 1.1 %kv,
135 root 1.16 set => {},
136 root 1.1 }, $class;
137    
138 root 1.25 unless (exists $self->{inc}) {
139     my @inc = @INC;
140     shift @inc if $inc[0] eq ".";
141     $self->{inc} = \@INC;
142     }
143    
144 root 1.1 my %inc_seen;
145 root 1.25 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", map Cwd::abs_path ($_), @{ $self->{inc} };
146 root 1.1 $self->{inc} = \@inc;
147    
148 root 1.14 # maybe not inc, but these?
149     # sitearchexp
150     # sitelib
151     # vendorarchexp
152     # vendorlibexp
153     # archlibexp
154     # privlibexp
155    
156 root 1.1 $self->_set_inc;
157    
158     $self
159     }
160    
161 root 1.4 sub _perl_path() {
162     my $secure_perl_path = $Config{perlpath};
163    
164     if ($^O ne 'VMS') {
165     $secure_perl_path .= $Config{_exe}
166     unless $secure_perl_path =~ m/$Config{_exe}$/i;
167     }
168    
169     $secure_perl_path
170     }
171    
172 root 1.8 sub _path2match {
173     my $re = join "|", map "\Q$_", @_;
174    
175 root 1.19 $re = "^(?:$re)\\/";
176 root 1.8 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
177    
178     $case_tolerant
179     ? qr<$re>i
180     : qr<$re>
181     }
182    
183 root 1.11 sub _read_packlist {
184 root 1.15 my ($self, $path) = @_;
185 root 1.11
186 root 1.15 my $pfxmatch = $self->{pfxmatch};
187     my $lib = $self->{lib};
188 root 1.11
189     my @packlist;
190    
191 root 1.15 open my $fh, "<:perlio", $path
192     or die "$path: $!";
193 root 1.11
194     while (<$fh>) {
195     chomp;
196     s/ .*$//; # newer-style .packlists might contain key=value pairs
197    
198 root 1.19 s%\\%/%g; # we only do unix-style paths internally
199    
200 root 1.15 s/$pfxmatch// and exists $lib->{$_}
201 root 1.11 or next;
202    
203     push @packlist, canonpath $_;
204     }
205    
206     \@packlist
207     }
208    
209 root 1.1 sub _set_inc {
210     my ($self) = @_;
211    
212 root 1.15 my $pfxmatch = _path2match @{ $self->{inc }};
213 root 1.2
214 root 1.1 my %lib;
215 root 1.2 my @packlists;
216 root 1.1
217     # find all files in all libdirs, earlier ones overwrite later ones
218 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
219    
220     while (@scan) {
221     my ($root, $dir) = @{ pop @scan };
222    
223     my $pfx = length $dir ? "$dir/" : "";
224    
225     for (do {
226     opendir my $fh, "$root/$dir"
227     or croak "$root/$dir: $!";
228     grep !/^\.\.?$/, readdir $fh
229     }) {
230     if (-d "$root/$dir/$_/.") {
231     $lib{"$pfx$_/"} = "$root/$pfx$_";
232     push @scan, [$root, "$pfx$_"];
233     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
234 root 1.15 push @packlists, "$root/$pfx.packlist";
235 root 1.2 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
236     # skip empty .bs files
237     # } elsif (/\.(?:pod|h|html)$/) {
238     # # not interested in those
239     } else {
240     #push @files, $_;
241     $lib{"$pfx$_"} = "$root/$pfx$_";
242 root 1.1 }
243 root 1.2 }
244    
245     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
246     }
247    
248 root 1.15 $self->{lib} = \%lib;
249     $self->{pfxmatch} = $pfxmatch;
250 root 1.14
251 root 1.2 my %packlist;
252 root 1.1
253 root 1.2 # need to go forward here
254     for (@packlists) {
255 root 1.15 my $packlist = $self->_read_packlist ($_);
256 root 1.2
257 root 1.11 $packlist{$_} = $packlist
258     for @$packlist;
259 root 1.1 }
260    
261 root 1.14 $self->{packlist} = \%packlist;
262 root 1.1 }
263    
264 root 1.5 =back
265    
266     =head2 TRACE/PACKLIST BASED ADDING
267    
268     The following methods add various things to the set of files.
269    
270     Each time a perl file is added, it is scanned by tracing either loading,
271     execution or compiling it, and seeing which other perl modules and
272     libraries have been loaded.
273    
274     For each library file found this way, additional dependencies are added:
275     if packlists are enabled, then all files of the distribution that contains
276     the file will be added. If packlists are disabled, then only shared
277     objects and autoload files for modules will be added.
278    
279     Only files from perl library directories will be added automatically. Any
280     other files (such as manpages or scripts installed in the F<bin>
281     directory) are skipped.
282    
283     If there is an error, such as a module not being found, then this module
284     croaks (as opposed to silently skipping). If you want to add something of
285     which you are not sure it exists, then you can wrap the call into C<eval
286     {}>. In some cases, you can avoid this by executing the code you want
287     to work later using C<add_eval> - see C<add_core_support> for an actual
288     example of this technique.
289    
290     Note that packlists are meant to add files not covered by other
291     mechanisms, such as resource files and other data files loaded directly by
292     a module - they are not meant to add dependencies that are missed because
293     they only happen at runtime.
294    
295     For example, with packlists, when using L<AnyEvent>, then all event loop
296     backends are automatically added as well, but I<not> any event loops
297     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
298     packlists, only the backend that is being used is added (i.e. normally
299     none, as loading AnyEvent does not instantly load any backend).
300    
301     To catch the extra event loop dependencies, you can either initialise
302     AnyEvent so it picks a suitable backend:
303    
304     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
305    
306     Or you can directly load the backend modules you plan to use:
307    
308     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
309    
310     An example of a program (or module) that has extra resource files is
311     L<Deliantra::Client> - the normal tracing (without packlist usage) will
312     correctly add all submodules, but miss the fonts and textures. By using
313     the packlist, those files are added correctly.
314    
315     =over 4
316    
317     =cut
318    
319 root 1.1 sub _add {
320     my ($self, $add) = @_;
321    
322     my $lib = $self->{lib};
323     my $path;
324    
325     for (@$add) {
326 root 1.12 $path = "lib/$_";
327 root 1.1
328     $self->{set}{$path} ||= do {
329     my @info;
330    
331     $info[I_SRC] = $lib->{$_}
332     or croak "$_: unable to locate file in perl library";
333    
334 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
335 root 1.12 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
336 root 1.2 for @{ $self->{packlist}{$_} };
337 root 1.3
338     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
339     # s/\.pm$//;
340     # s%/%::%g;
341     # my $pkg = "libextractor" . ++$self->{count};
342     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
343     # unless $self->{_add_do}{$_}++;
344     # }
345     #
346     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
347     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
348    
349 root 1.2 } elsif (/^(.*)\.pm$/) {
350 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
351     $auto =~ m%/([^/]+)/$% or die;
352     my $base = $1;
353    
354     if (exists $lib->{$auto}) {
355     # auto dir exists, scan it for cool stuff
356    
357     # 1. shared object, others are of no interest to us
358     my $so = "$auto$base.$Config{dlext}";
359     if (my $src = $lib->{$so}) {
360 root 1.12 $so = "lib/$so";
361 root 1.1 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
362     }
363    
364     # 2. autoloader/autosplit
365     my $ix = "${auto}autosplit.ix";
366     if (my $src = $lib->{$ix}) {
367 root 1.12 $ix = "lib/$ix";
368 root 1.1 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
369    
370     open my $fh, "<:perlio", $src
371     or croak "$src: $!";
372    
373     my $package;
374    
375     while (<$fh>) {
376     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
377     my $al = "auto/$package/$1.al";
378     my $src = $lib->{$al}
379     or croak "$al: autoload file not found, but should be there.";
380    
381 root 1.12 $al = "lib/$al";
382 root 1.1 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
383    
384     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
385     ($package = $1) =~ s/::/\//g;
386     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
387     # nop
388     } else {
389     warn "WARNING: $src: unparsable line, please report: $_";
390     }
391     }
392     }
393    
394     skip:
395     }
396     }
397    
398 root 1.16 if (exists $self->{extra_deps}{$_}) {
399     # we require it again, because many extra dependencies require the main module to be loaded
400     $self->add_eval ("require q\x00$_\x00");
401    
402     exists $lib->{$_} and $self->add_require ($_)
403     for @{ $self->{extra_deps}{$_} };
404     }
405    
406 root 1.1 \@info
407     };
408     }
409     }
410    
411     sub _trace {
412     my ($self, $file, $eval) = @_;
413    
414 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
415 root 1.1 }
416    
417     sub _trace_flush {
418     my ($self) = @_;
419    
420 root 1.3 # ->_add might add additional files to trace
421     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
422     my $tmpdir = newdir File::Temp;
423     my $dir = $tmpdir->dirname;
424    
425     open my $fh, ">:perlio", "$dir/eval"
426     or croak "$dir/eval: $!";
427     syswrite $fh,
428     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
429     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
430     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
431     . "CHECK {\n"
432     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
433     . 'print join "\x00", values %INC;'
434     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
435     . "}\n"
436     . (delete $self->{trace_check});
437     close $fh;
438    
439 root 1.4 system _perl_path, "-c", "$dir/eval"
440 root 1.3 and croak "trace failure, check trace process output - caught";
441 root 1.1
442 root 1.3 my @inc = split /\x00/, do {
443     open my $fh, "<:perlio", "$dir/out"
444     or croak "$dir/out: $!";
445     local $/;
446     scalar readline $fh
447     };
448 root 1.1
449 root 1.15 my $pfxmatch = $self->{pfxmatch};
450 root 1.1
451 root 1.3 # remove the library directory prefix, hope for the best
452 root 1.15 s/$pfxmatch//
453 root 1.3 or croak "$_: file outside any library directory"
454     for @inc;
455 root 1.1
456 root 1.3 $self->_add (\@inc);
457     }
458 root 1.1 }
459    
460     =item $extractor->add_mod ($module[, $module...])
461    
462     Adds the given module(s) to the file set - the module name must be specified
463     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
464    
465     The program will be loaded with the default import list, any dependent
466     files, such as the shared object implementing xs functions, or autoload
467     files, will also be added.
468    
469 root 1.5 If you want to use a different import list (for those rare modules wghere
470     import lists trigger different backend modules to be loaded for example),
471     you can use C<add_eval> instead:
472    
473     $extractor->add_eval ("use Module qw(a b c)");
474    
475 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
476     from the distribution they are part of.
477    
478     $extractor->add_mod ("Coro", "AnyEvent::AIO");
479    
480     =cut
481    
482     sub add_mod {
483     my $self = shift;
484    
485 root 1.3 for (@_) {
486     my $pkg = "libextractor" . ++$self->{count};
487     $self->_trace ("use $_", "{ package $pkg; use $_ }")
488     unless $self->{add_mod}{$_}++;
489     }
490 root 1.1 }
491    
492 root 1.16 =item $extractor->add_require ($name[, $name...])
493    
494     Works like C<add_mod>, but uses C<require $name> to load the module, i.e.
495     the name must be a filename.
496    
497     Example: load Coro and AnyEvent::AIO, but using C<add_require> instead of C<add_mod>.
498    
499     $extractor->add_require ("Coro.pm", "AnyEvent/AIO.pm");
500    
501     =cut
502    
503     sub add_require {
504     my $self = shift;
505    
506     for (@_) {
507     $self->add_eval ("require q\x00$_\x00")
508     unless $self->{add_require}{$_}++;
509     }
510     }
511    
512 root 1.12 =item $extractor->add_bin ($name[, $name...])
513 root 1.1
514     Adds the given (perl) program(s) to the file set, that is, a program
515     installed by some perl module, written in perl (an example would be the
516 root 1.23 L<perl-libextractor> program that is part of the C<Perl::LibExtractor>
517 root 1.1 distribution).
518    
519 root 1.5 Example: add the deliantra client program installed by the
520 root 1.12 L<Deliantra::Client> module and put it under F<bin/deliantra>.
521 root 1.1
522 root 1.12 $extractor->add_bin ("deliantra");
523 root 1.1
524     =cut
525    
526 root 1.12 sub add_bin {
527 root 1.1 my $self = shift;
528    
529     exe:
530     for my $exe (@_) {
531     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
532     if (open my $fh, "<:perlio", "$dir/$exe") {
533 root 1.12 if (-f $fh) {
534     my $file = do { local $/; readline $fh };
535 root 1.1
536 root 1.12 $self->_trace_flush if exists $self->{trace_check};
537     $self->{trace_check} = $file;
538 root 1.1
539 root 1.12 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
540     next exe;
541     }
542 root 1.1 }
543     }
544    
545 root 1.12 croak "add_bin ($exe): executable not found";
546 root 1.1 }
547     }
548    
549     =item $extractor->add_eval ($string)
550    
551 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
552     by it. For example, this would add L<AnyEvent> and the default backend
553     implementation module and event loop module:
554 root 1.1
555     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
556    
557 root 1.5 Each code snippet will be executed in its own package and under C<use
558     strict>.
559    
560 root 1.1 =cut
561    
562     sub add_eval {
563     my ($self, $eval) = @_;
564    
565 root 1.21 (my $file = substr $eval, 0, 64) =~ s/\015?\012/\\n/g;
566    
567 root 1.5 my $pkg = "libextractor" . ++$self->{count};
568 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
569 root 1.21 $self->_trace ($file,
570 root 1.3 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
571 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
572 root 1.3 );
573 root 1.1 }
574    
575 root 1.5 =back
576    
577     =head2 OTHER METHODS FOR ADDING FILES
578    
579     The following methods add commonly used files that are either not covered
580     by other methods or add commonly-used dependencies.
581    
582     =over 4
583    
584 root 1.1 =item $extractor->add_perl
585    
586     Adds the perl binary itself to the file set, including the libperl dll, if
587     needed.
588    
589 root 1.12 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
590     some F<dll/libperl.so.XXX>.
591 root 1.5
592 root 1.4 =cut
593    
594     sub add_perl {
595     my ($self) = @_;
596    
597 root 1.12 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
598 root 1.4
599     # on debian, we have the special case of a perl binary linked against
600     # a static libperl.a (which is not available), but the Config says to use
601     # a shared library, which is in the wrong directory, too (which breaks
602     # every other perl installation on the system - they are so stupid).
603    
604     # that means we can't find the libperl.so, because dbeian actively breaks
605     # their perl install, and we don't need it. we work around this by silently
606     # not including the libperl if we cannot find it.
607    
608     if ($Config{useshrplib} eq "true") {
609 root 1.8 my ($libperl, $libpath);
610    
611     if ($^O eq "cygwin") {
612     $libperl = $Config{libperl};
613     $libpath = "$Config{binexp}/$libperl";
614     } elsif ($^O eq "MSWin32") {
615     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
616     $libpath = "$Config{binexp}/$libperl";
617     } else {
618     $libperl = $Config{libperl};
619     $libpath = $self->{lib}{"CORE/$libperl"};
620 root 1.4 }
621 root 1.8
622 root 1.18 $self->{set}{"dll/$libperl"} = [$libpath]
623 root 1.8 if length $libpath && -e $libpath;
624 root 1.4 }
625     }
626 root 1.3
627     =item $extractor->add_core_support
628    
629     Try to add modules and files needed to support commonly-used builtin
630     language features. For example to open a scalar for I/O you need the
631     L<PerlIO::scalar> module:
632    
633     open $fh, "<", \$scalar
634    
635     A number of regex and string features (e.g. C<ucfirst>) need some unicore
636     files, e.g.:
637    
638     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
639    
640     This call adds these files (simply by executing code similar to the above
641     code fragments).
642    
643     Notable things that are missing are other PerlIO layers, such as
644     L<PerlIO::encoding>, and named character and character class matches.
645    
646     =cut
647    
648     sub add_core_support {
649     my ($self) = @_;
650    
651 root 1.21 $self->add_eval ('
652     # PerlIO::Scalar
653     my $v; open my $fh, "<", \$v;
654    
655     # various unicore regex/builtin gambits
656     my $x = chr 1234;
657     "\u$x\U$x\l$x\L$x";
658     $x =~ /$_$x?/i
659     for qw(\d \w \s \b \R \h \v);
660     split " ", $x; # usually covered by the regex above
661     ');
662    
663 root 1.10 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
664 root 1.3 }
665 root 1.1
666 root 1.3 =item $extractor->add_unicore
667    
668 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
669 root 1.5 needed.
670    
671     If you are not sure which unicode character classes and similar unicore
672     databases you need, and you do not care about an extra one thousand(!)
673     files comprising 4MB of data, then you can just call this method, which
674     adds basically all files from perl's unicode database.
675    
676 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
677     subset of C<add_unicore> - the former adds all files neccessary to support
678     core builtins (which includes some unicore files and other things), while
679     the latter adds all unicore files (but nothing else).
680    
681     When in doubt, use both.
682    
683 root 1.5 =cut
684    
685     sub add_unicore {
686     my ($self) = @_;
687    
688     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
689     }
690    
691 root 1.11 =item $extractor->add_core
692    
693     This adds all files from the perl core distribution, that is, all library
694     files that come with perl.
695    
696     This is a superset of C<add_core_support> and C<add_unicore>.
697    
698     This is quite a lot, but on the plus side, you can be sure nothing is
699     missing.
700    
701 root 1.13 This requires a full perl installation - Debian GNU/Linux doesn't package
702     the full perl library, so this function will not work there.
703 root 1.11
704     =cut
705    
706     sub add_core {
707     my ($self) = @_;
708    
709     my $lib = $self->{lib};
710    
711     for (@{
712 root 1.15 $self->_read_packlist (".packlist")
713 root 1.11 }) {
714     $self->{set}{$_} ||= [
715 root 1.12 "lib/"
716 root 1.11 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
717     ];
718     }
719     }
720    
721 root 1.5 =back
722    
723     =head2 GLOB-BASED ADDING AND FILTERING
724 root 1.1
725 root 1.5 These methods add or manipulate files by using glob-based patterns.
726    
727     These glob patterns work similarly to glob patterns in the shell:
728    
729     =over 4
730    
731     =item /
732    
733     A F</> at the start of the pattern interprets the pattern as a file
734     path inside the file set, almost the same as in the shell. For example,
735     F</bin/perl*> would match all files whose names starting with F<perl>
736     inside the F<bin> directory in the set.
737    
738     If the F</> is missing, then the pattern is interpreted as a module name
739 root 1.12 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
740     while F<Coro::*> would match F<lib/Coro/*.pm>.
741 root 1.5
742     =item *
743    
744     A single star matches anything inside a single directory component. For
745     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
746     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
747 root 1.1
748 root 1.5 Another way to look at it is that a single star matches anything but a
749     slash (F</>).
750 root 1.1
751 root 1.5 =item **
752 root 1.1
753 root 1.5 A double star matches any number of characters in the path, including F</>.
754 root 1.1
755 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
756     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
757 root 1.1
758 root 1.5 =back
759 root 1.1
760 root 1.2 =cut
761    
762     sub _extglob2re {
763 root 1.5 for (quotemeta $_[1]) {
764 root 1.2 s/\\\*\\\*/.*/g;
765     s/\\\*/[^\/]*/g;
766     s/\\\?/[^\/]/g;
767    
768     unless (s%^\\/%%) {
769     s%\\:\\:%/%g;
770 root 1.12 $_ = "lib/$_\\.pm";
771 root 1.2 }
772    
773     $_ .= '$';
774     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
775    
776     return qr<^$_>s
777     }
778     }
779    
780 root 1.5 =over 4
781    
782     =item $extractor->add_glob ($modglob[, $modglob...])
783    
784     Adds all files from the perl library that match the given glob pattern.
785    
786     For example, you could implement C<add_unicore> yourself like this:
787    
788     $extractor->add_glob ("/unicore/**.pl");
789    
790     =cut
791    
792     sub add_glob {
793     my $self = shift;
794    
795     for (@_) {
796     my $pat = $self->_extglob2re ($_);
797     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
798     }
799     }
800    
801     =item $extractor->filter ($pattern[, $pattern...])
802    
803     Applies a series of include/exclude filters. Each filter must start with
804     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
805     pattern. The rest of the pattern is a normal glob pattern.
806    
807     An exclude pattern (C<->) instantly removes all matching files from
808     the set. An include pattern (C<+>) protects matching files from later
809     removals.
810    
811     That is, if you have an include pattern then all files that were matched
812     by it will be included in the set, regardless of any further exclude
813     patterns matching the same files.
814    
815     Likewise, any file excluded by a pattern will not be included in the set,
816     even if matched by later include patterns.
817    
818     Any files not matched by any expression will simply stay in the set.
819    
820     For example, to remove most of the useless autoload functions by the POSIX
821     module (they either do the same thing as a builtin or always raise an
822 root 1.12 error), you would use this:
823 root 1.5
824     $extractor->filter ("-/lib/auto/POSIX/*.al");
825    
826     This does not remove all autoload files, only the ones not defined by a
827     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
828    
829     =cut
830    
831 root 1.2 sub filter {
832     my ($self, @patterns) = @_;
833    
834 root 1.3 $self->_trace_flush;
835    
836     my $set = $self->{set};
837     my %include;
838 root 1.2
839     for my $pat (@patterns) {
840     $pat =~ s/^([+\-])//
841     or croak "$_: not a valid filter pattern (missing + or - prefix)";
842     my $inc = $1 eq "+";
843     $pat = $self->_extglob2re ($pat);
844 root 1.3
845     my @match = grep /$pat/, keys %$set;
846    
847     if ($inc) {
848     @include{@match} = delete @$set{@match};
849     } else {
850     delete @$set{@{ $_->[I_DEP] }} # remove dependents
851     for delete @$set{@match};
852     }
853 root 1.2 }
854 root 1.3
855     my @include = keys %include;
856     @$set{@include} = delete @include{@include};
857 root 1.2 }
858 root 1.1
859 root 1.3 =item $extractor->runtime_only
860    
861     This removes all files that are not needed at runtime, such as static
862     archives, header and other files needed only for compilation of modules,
863     and pod and html files (which are unlikely to be needed at runtime).
864 root 1.1
865 root 1.8 This is quite useful when you want to have only files actually needed to
866 root 1.3 execute a program.
867    
868     =cut
869    
870     sub runtime_only {
871     my ($self) = @_;
872    
873     $self->_trace_flush;
874    
875     my $set = $self->{set};
876    
877 root 1.8 # delete all static libraries, also windows stuff
878 root 1.12 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
879 root 1.3
880     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
881 root 1.12 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
882 root 1.3
883     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
884 root 1.24 # also typemap
885     delete @$set{ grep m%^lib/.*\.(?:pod|h|html|typemap)$%s, keys %$set };
886 root 1.11
887     # delete unneeded unicore files
888 root 1.12 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
889 root 1.3 }
890 root 1.1
891     =back
892    
893 root 1.5 =head2 RESULT SET
894    
895     =over 4
896 root 1.1
897 root 1.5 =item $set = $extractor->set
898 root 1.1
899 root 1.5 Returns a hash reference that represents the result set. The hash is the
900     actual internal storage hash and can only be modified as described below.
901 root 1.1
902 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
903     e.g.:
904 root 1.1
905 root 1.5 bin/perl
906     lib/unicore/lib/Blk/Superscr.pl
907     lib/AnyEvent/Impl/EV.pm
908    
909     The value is an array reference with mostly unspecified contents, except
910     the first element, which is the file system path where the actual file can
911     be found.
912    
913     This code snippet lists all files inside the set:
914    
915     print "$_\n"
916     for sort keys %{ $extractor->set });
917    
918     This code fragment prints C<< filesystem_path => set_path >> pairs for all
919     files in the set:
920    
921     my $set = $extractor->set;
922     while (my ($set,$fspath) = each %$set) {
923     print "$fspath => $set\n";
924     }
925 root 1.1
926 root 1.5 You can implement your own filtering by asking for the result set with
927     C<< $extractor->set >>, and then deleting keys from the referenced hash
928     - since you can ask for the result set at any time you can add things,
929     filter them out this way, and add additional things.
930    
931     =back
932    
933     =cut
934    
935     sub set {
936 root 1.6 $_[0]->_trace_flush;
937 root 1.5 $_[0]{set}
938     }
939    
940     =head1 EXAMPLE
941    
942     To package he deliantra client (L<Deliantra::Client>), finding all
943     (perl) files needed to run it is a first step. This can be done by using
944     something like the following code snippet:
945    
946 root 1.12 my $ex = new Perl::LibExtractor;
947 root 1.5
948     $ex->add_perl;
949     $ex->add_core_support;
950 root 1.12 $ex->add_bin ("deliantra");
951 root 1.5 $ex->add_mod ("AnyEvent::Impl::EV");
952     $ex->add_mod ("AnyEvent::Impl::Perl");
953     $ex->add_mod ("Urlader");
954     $ex->filter ("-/*/auto/POSIX/**.al");
955     $ex->runtime_only;
956    
957     First it sets the perl library directory to F<pm> and F<.> (the latter
958     to work around some AutoLoader bugs), so perl uses only the perl library
959     files that came with the binary package.
960    
961     Then it sets some environment variable to override the system default
962     (which might be incompatible).
963    
964     Then it runs the client itself, using C<require>. Since C<require> only
965     looks in the perl library directory this is the reaosn why the scripts
966     were put there (of course, since F<.> is also included it doesn't matter,
967     but I refuse to yield to bugs).
968    
969     Finally it exits with a clean status to signal "ok" to Urlader.
970    
971     Back to the original C<Perl::LibExtractor> script: after initialising a
972     new set, the script simply adds the F<perl> interpreter and core support
973     files (just in case, not all are needed, but some are, and I am too lazy
974     to find out which ones exactly).
975    
976     Then it adds the deliantra executable itself, which in turn adds most of
977     the required modules. After that, the AnyEvent implementation modules are
978     added because these dependencies are not picked up automatically.
979    
980     The L<Urlader> module is added because the client itself does not depend
981     on it at all, but the wrapper does.
982    
983     At this point, all required files are present, and it's time to slim
984     down: most of the ueseless POSIX autoloaded functions are removed,
985     not because they are so big, but because creating files is a costly
986     operation in itself, so even small fiels have considerable overhead when
987     unpacking. Then files not required for running the client are removed.
988    
989     And that concludes it, the set is now ready.
990 root 1.1
991     =head1 SEE ALSO
992    
993 root 1.23 The utility program that comes with this module: L<perl-libextractor>.
994 root 1.1
995     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
996    
997 root 1.5 =head1 LICENSE
998    
999     This software package is licensed under the GPL version 3 or any later
1000     version, see COPYING for details.
1001    
1002     This license does not, of course, apply to any output generated by this
1003     software.
1004    
1005 root 1.1 =head1 AUTHOR
1006    
1007     Marc Lehmann <schmorp@schmorp.de>
1008     http://home.schmorp.de/
1009    
1010     =cut
1011    
1012     1;
1013