ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.23
Committed: Sat Feb 4 20:42:57 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.22: +3 -3 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27 root 1.23 The command-line utility F<perl-libextractor> can be a convenient
28 root 1.2 alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36 root 1.22 our $VERSION = '1.1';
37 root 1.1
38     use Config;
39 root 1.7 use File::Spec ();
40 root 1.1 use File::Temp ();
41    
42     use common::sense;
43    
44     sub I_SRC () { 0 }
45     sub I_DEP () { 1 }
46    
47     sub croak($) {
48     require Carp;
49     Carp::croak "(Perl::LibExtractor) $_[0]";
50     }
51    
52 root 1.8 my $canonpath = File::Spec->can ("canonpath");
53     my $case_tolerant = File::Spec->case_tolerant;
54 root 1.7
55     sub canonpath($) {
56     local $_ = $canonpath->(File::Spec::, $_[0]);
57     s%\\%/%g;
58 root 1.8 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 root 1.7 $_
60     }
61    
62 root 1.5 =head2 CREATION
63    
64     =over 4
65    
66 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
67    
68     Creates a new extractor object. Each extractor object stores some
69     configuration options and a subset of files that can be queried at any
70     time,.
71    
72 root 1.12 Binary executables (such as the perl interpreter) are stored inside
73     F<bin/>, perl scripts are stored under F<script/>, perl library files are
74     stored under F<lib/> and shared libraries are stored under F<dll/>.
75    
76 root 1.1 The following key-value pairs exist, with default values as specified.
77    
78     =over 4
79    
80     =item inc => \@INC without "."
81    
82     An arrayref with paths to perl library directories. The default is
83     C<\@INC>, with F<.> removed.
84    
85     To prepend custom dirs just do this:
86    
87     inc => ["mydir", @INC],
88    
89 root 1.3 =item use_packlist => 1
90 root 1.1
91 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 root 1.5 then each time a file is traced, the complete distribution that contains
93     it is included (but not traced).
94    
95     If disabled, only shared objects and autoload files will be added.
96 root 1.1
97 root 1.13 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98     this option will fail. Other perls should be fine.
99    
100 root 1.16 =item extra_deps => { file => [files...] }
101    
102     Some (mainly runtime dependencies in the perl core library) cannot be
103     detected automatically by this module, especially if you don't use
104     packlists and C<add_core>.
105    
106     This module comes with a set of default dependencies (such as L<Carp>
107     requiring L<Carp::Heavy>), which you cna override with this parameter.
108    
109     To see the default set of dependencies that come with this module, use
110     this:
111    
112     perl -MPerl::LibExtractor -MData::Dumper -e 'print Dumper $Perl::LibExtractor::EXTRA_DEPS'
113    
114 root 1.1 =back
115    
116     =cut
117    
118 root 1.16 our $EXTRA_DEPS = {
119 root 1.17 'bytes.pm' => ['bytes_heavy.pl'],
120     'utf8.pm' => ['utf8_heavy.pl'],
121     'Config.pm' => ['Config_heavy.pl', 'Config_git.pl'],
122     'Carp.pm' => ['Carp/Heavy.pm'],
123     'Exporter.pm' => ['Exporter/Heavy.pm'],
124 root 1.16 };
125    
126 root 1.1 sub new {
127     my ($class, %kv) = @_;
128    
129     my $self = bless {
130 root 1.16 inc => [grep $_ ne ".", @INC],
131 root 1.3 use_packlist => 1,
132 root 1.16 extra_deps => $EXTRA_DEPS,
133 root 1.1 %kv,
134 root 1.16 set => {},
135 root 1.1 }, $class;
136    
137     my %inc_seen;
138 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
139 root 1.1 $self->{inc} = \@inc;
140    
141 root 1.14 # maybe not inc, but these?
142     # sitearchexp
143     # sitelib
144     # vendorarchexp
145     # vendorlibexp
146     # archlibexp
147     # privlibexp
148    
149 root 1.1 $self->_set_inc;
150    
151     $self
152     }
153    
154 root 1.4 sub _perl_path() {
155     my $secure_perl_path = $Config{perlpath};
156    
157     if ($^O ne 'VMS') {
158     $secure_perl_path .= $Config{_exe}
159     unless $secure_perl_path =~ m/$Config{_exe}$/i;
160     }
161    
162     $secure_perl_path
163     }
164    
165 root 1.8 sub _path2match {
166     my $re = join "|", map "\Q$_", @_;
167    
168 root 1.19 $re = "^(?:$re)\\/";
169 root 1.8 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
170    
171     $case_tolerant
172     ? qr<$re>i
173     : qr<$re>
174     }
175    
176 root 1.11 sub _read_packlist {
177 root 1.15 my ($self, $path) = @_;
178 root 1.11
179 root 1.15 my $pfxmatch = $self->{pfxmatch};
180     my $lib = $self->{lib};
181 root 1.11
182     my @packlist;
183    
184 root 1.15 open my $fh, "<:perlio", $path
185     or die "$path: $!";
186 root 1.11
187     while (<$fh>) {
188     chomp;
189     s/ .*$//; # newer-style .packlists might contain key=value pairs
190    
191 root 1.19 s%\\%/%g; # we only do unix-style paths internally
192    
193 root 1.15 s/$pfxmatch// and exists $lib->{$_}
194 root 1.11 or next;
195    
196     push @packlist, canonpath $_;
197     }
198    
199     \@packlist
200     }
201    
202 root 1.1 sub _set_inc {
203     my ($self) = @_;
204    
205 root 1.15 my $pfxmatch = _path2match @{ $self->{inc }};
206 root 1.2
207 root 1.1 my %lib;
208 root 1.2 my @packlists;
209 root 1.1
210     # find all files in all libdirs, earlier ones overwrite later ones
211 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
212    
213     while (@scan) {
214     my ($root, $dir) = @{ pop @scan };
215    
216     my $pfx = length $dir ? "$dir/" : "";
217    
218     for (do {
219     opendir my $fh, "$root/$dir"
220     or croak "$root/$dir: $!";
221     grep !/^\.\.?$/, readdir $fh
222     }) {
223     if (-d "$root/$dir/$_/.") {
224     $lib{"$pfx$_/"} = "$root/$pfx$_";
225     push @scan, [$root, "$pfx$_"];
226     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
227 root 1.15 push @packlists, "$root/$pfx.packlist";
228 root 1.2 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
229     # skip empty .bs files
230     # } elsif (/\.(?:pod|h|html)$/) {
231     # # not interested in those
232     } else {
233     #push @files, $_;
234     $lib{"$pfx$_"} = "$root/$pfx$_";
235 root 1.1 }
236 root 1.2 }
237    
238     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
239     }
240    
241 root 1.15 $self->{lib} = \%lib;
242     $self->{pfxmatch} = $pfxmatch;
243 root 1.14
244 root 1.2 my %packlist;
245 root 1.1
246 root 1.2 # need to go forward here
247     for (@packlists) {
248 root 1.15 my $packlist = $self->_read_packlist ($_);
249 root 1.2
250 root 1.11 $packlist{$_} = $packlist
251     for @$packlist;
252 root 1.1 }
253    
254 root 1.14 $self->{packlist} = \%packlist;
255 root 1.1 }
256    
257 root 1.5 =back
258    
259     =head2 TRACE/PACKLIST BASED ADDING
260    
261     The following methods add various things to the set of files.
262    
263     Each time a perl file is added, it is scanned by tracing either loading,
264     execution or compiling it, and seeing which other perl modules and
265     libraries have been loaded.
266    
267     For each library file found this way, additional dependencies are added:
268     if packlists are enabled, then all files of the distribution that contains
269     the file will be added. If packlists are disabled, then only shared
270     objects and autoload files for modules will be added.
271    
272     Only files from perl library directories will be added automatically. Any
273     other files (such as manpages or scripts installed in the F<bin>
274     directory) are skipped.
275    
276     If there is an error, such as a module not being found, then this module
277     croaks (as opposed to silently skipping). If you want to add something of
278     which you are not sure it exists, then you can wrap the call into C<eval
279     {}>. In some cases, you can avoid this by executing the code you want
280     to work later using C<add_eval> - see C<add_core_support> for an actual
281     example of this technique.
282    
283     Note that packlists are meant to add files not covered by other
284     mechanisms, such as resource files and other data files loaded directly by
285     a module - they are not meant to add dependencies that are missed because
286     they only happen at runtime.
287    
288     For example, with packlists, when using L<AnyEvent>, then all event loop
289     backends are automatically added as well, but I<not> any event loops
290     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
291     packlists, only the backend that is being used is added (i.e. normally
292     none, as loading AnyEvent does not instantly load any backend).
293    
294     To catch the extra event loop dependencies, you can either initialise
295     AnyEvent so it picks a suitable backend:
296    
297     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
298    
299     Or you can directly load the backend modules you plan to use:
300    
301     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
302    
303     An example of a program (or module) that has extra resource files is
304     L<Deliantra::Client> - the normal tracing (without packlist usage) will
305     correctly add all submodules, but miss the fonts and textures. By using
306     the packlist, those files are added correctly.
307    
308     =over 4
309    
310     =cut
311    
312 root 1.1 sub _add {
313     my ($self, $add) = @_;
314    
315     my $lib = $self->{lib};
316     my $path;
317    
318     for (@$add) {
319 root 1.12 $path = "lib/$_";
320 root 1.1
321     $self->{set}{$path} ||= do {
322     my @info;
323    
324     $info[I_SRC] = $lib->{$_}
325     or croak "$_: unable to locate file in perl library";
326    
327 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
328 root 1.12 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
329 root 1.2 for @{ $self->{packlist}{$_} };
330 root 1.3
331     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
332     # s/\.pm$//;
333     # s%/%::%g;
334     # my $pkg = "libextractor" . ++$self->{count};
335     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
336     # unless $self->{_add_do}{$_}++;
337     # }
338     #
339     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
340     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
341    
342 root 1.2 } elsif (/^(.*)\.pm$/) {
343 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
344     $auto =~ m%/([^/]+)/$% or die;
345     my $base = $1;
346    
347     if (exists $lib->{$auto}) {
348     # auto dir exists, scan it for cool stuff
349    
350     # 1. shared object, others are of no interest to us
351     my $so = "$auto$base.$Config{dlext}";
352     if (my $src = $lib->{$so}) {
353 root 1.12 $so = "lib/$so";
354 root 1.1 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
355     }
356    
357     # 2. autoloader/autosplit
358     my $ix = "${auto}autosplit.ix";
359     if (my $src = $lib->{$ix}) {
360 root 1.12 $ix = "lib/$ix";
361 root 1.1 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
362    
363     open my $fh, "<:perlio", $src
364     or croak "$src: $!";
365    
366     my $package;
367    
368     while (<$fh>) {
369     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
370     my $al = "auto/$package/$1.al";
371     my $src = $lib->{$al}
372     or croak "$al: autoload file not found, but should be there.";
373    
374 root 1.12 $al = "lib/$al";
375 root 1.1 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
376    
377     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
378     ($package = $1) =~ s/::/\//g;
379     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
380     # nop
381     } else {
382     warn "WARNING: $src: unparsable line, please report: $_";
383     }
384     }
385     }
386    
387     skip:
388     }
389     }
390    
391 root 1.16 if (exists $self->{extra_deps}{$_}) {
392     # we require it again, because many extra dependencies require the main module to be loaded
393     $self->add_eval ("require q\x00$_\x00");
394    
395     exists $lib->{$_} and $self->add_require ($_)
396     for @{ $self->{extra_deps}{$_} };
397     }
398    
399 root 1.1 \@info
400     };
401     }
402     }
403    
404     sub _trace {
405     my ($self, $file, $eval) = @_;
406    
407 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
408 root 1.1 }
409    
410     sub _trace_flush {
411     my ($self) = @_;
412    
413 root 1.3 # ->_add might add additional files to trace
414     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
415     my $tmpdir = newdir File::Temp;
416     my $dir = $tmpdir->dirname;
417    
418     open my $fh, ">:perlio", "$dir/eval"
419     or croak "$dir/eval: $!";
420     syswrite $fh,
421     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
422     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
423     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
424     . "CHECK {\n"
425     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
426     . 'print join "\x00", values %INC;'
427     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
428     . "}\n"
429     . (delete $self->{trace_check});
430     close $fh;
431    
432 root 1.4 system _perl_path, "-c", "$dir/eval"
433 root 1.3 and croak "trace failure, check trace process output - caught";
434 root 1.1
435 root 1.3 my @inc = split /\x00/, do {
436     open my $fh, "<:perlio", "$dir/out"
437     or croak "$dir/out: $!";
438     local $/;
439     scalar readline $fh
440     };
441 root 1.1
442 root 1.15 my $pfxmatch = $self->{pfxmatch};
443 root 1.1
444 root 1.3 # remove the library directory prefix, hope for the best
445 root 1.15 s/$pfxmatch//
446 root 1.3 or croak "$_: file outside any library directory"
447     for @inc;
448 root 1.1
449 root 1.3 $self->_add (\@inc);
450     }
451 root 1.1 }
452    
453     =item $extractor->add_mod ($module[, $module...])
454    
455     Adds the given module(s) to the file set - the module name must be specified
456     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
457    
458     The program will be loaded with the default import list, any dependent
459     files, such as the shared object implementing xs functions, or autoload
460     files, will also be added.
461    
462 root 1.5 If you want to use a different import list (for those rare modules wghere
463     import lists trigger different backend modules to be loaded for example),
464     you can use C<add_eval> instead:
465    
466     $extractor->add_eval ("use Module qw(a b c)");
467    
468 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
469     from the distribution they are part of.
470    
471     $extractor->add_mod ("Coro", "AnyEvent::AIO");
472    
473     =cut
474    
475     sub add_mod {
476     my $self = shift;
477    
478 root 1.3 for (@_) {
479     my $pkg = "libextractor" . ++$self->{count};
480     $self->_trace ("use $_", "{ package $pkg; use $_ }")
481     unless $self->{add_mod}{$_}++;
482     }
483 root 1.1 }
484    
485 root 1.16 =item $extractor->add_require ($name[, $name...])
486    
487     Works like C<add_mod>, but uses C<require $name> to load the module, i.e.
488     the name must be a filename.
489    
490     Example: load Coro and AnyEvent::AIO, but using C<add_require> instead of C<add_mod>.
491    
492     $extractor->add_require ("Coro.pm", "AnyEvent/AIO.pm");
493    
494     =cut
495    
496     sub add_require {
497     my $self = shift;
498    
499     for (@_) {
500     $self->add_eval ("require q\x00$_\x00")
501     unless $self->{add_require}{$_}++;
502     }
503     }
504    
505 root 1.12 =item $extractor->add_bin ($name[, $name...])
506 root 1.1
507     Adds the given (perl) program(s) to the file set, that is, a program
508     installed by some perl module, written in perl (an example would be the
509 root 1.23 L<perl-libextractor> program that is part of the C<Perl::LibExtractor>
510 root 1.1 distribution).
511    
512 root 1.5 Example: add the deliantra client program installed by the
513 root 1.12 L<Deliantra::Client> module and put it under F<bin/deliantra>.
514 root 1.1
515 root 1.12 $extractor->add_bin ("deliantra");
516 root 1.1
517     =cut
518    
519 root 1.12 sub add_bin {
520 root 1.1 my $self = shift;
521    
522     exe:
523     for my $exe (@_) {
524     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
525     if (open my $fh, "<:perlio", "$dir/$exe") {
526 root 1.12 if (-f $fh) {
527     my $file = do { local $/; readline $fh };
528 root 1.1
529 root 1.12 $self->_trace_flush if exists $self->{trace_check};
530     $self->{trace_check} = $file;
531 root 1.1
532 root 1.12 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
533     next exe;
534     }
535 root 1.1 }
536     }
537    
538 root 1.12 croak "add_bin ($exe): executable not found";
539 root 1.1 }
540     }
541    
542     =item $extractor->add_eval ($string)
543    
544 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
545     by it. For example, this would add L<AnyEvent> and the default backend
546     implementation module and event loop module:
547 root 1.1
548     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
549    
550 root 1.5 Each code snippet will be executed in its own package and under C<use
551     strict>.
552    
553 root 1.1 =cut
554    
555     sub add_eval {
556     my ($self, $eval) = @_;
557    
558 root 1.21 (my $file = substr $eval, 0, 64) =~ s/\015?\012/\\n/g;
559    
560 root 1.5 my $pkg = "libextractor" . ++$self->{count};
561 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
562 root 1.21 $self->_trace ($file,
563 root 1.3 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
564 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
565 root 1.3 );
566 root 1.1 }
567    
568 root 1.5 =back
569    
570     =head2 OTHER METHODS FOR ADDING FILES
571    
572     The following methods add commonly used files that are either not covered
573     by other methods or add commonly-used dependencies.
574    
575     =over 4
576    
577 root 1.1 =item $extractor->add_perl
578    
579     Adds the perl binary itself to the file set, including the libperl dll, if
580     needed.
581    
582 root 1.12 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
583     some F<dll/libperl.so.XXX>.
584 root 1.5
585 root 1.4 =cut
586    
587     sub add_perl {
588     my ($self) = @_;
589    
590 root 1.12 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
591 root 1.4
592     # on debian, we have the special case of a perl binary linked against
593     # a static libperl.a (which is not available), but the Config says to use
594     # a shared library, which is in the wrong directory, too (which breaks
595     # every other perl installation on the system - they are so stupid).
596    
597     # that means we can't find the libperl.so, because dbeian actively breaks
598     # their perl install, and we don't need it. we work around this by silently
599     # not including the libperl if we cannot find it.
600    
601     if ($Config{useshrplib} eq "true") {
602 root 1.8 my ($libperl, $libpath);
603    
604     if ($^O eq "cygwin") {
605     $libperl = $Config{libperl};
606     $libpath = "$Config{binexp}/$libperl";
607     } elsif ($^O eq "MSWin32") {
608     ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
609     $libpath = "$Config{binexp}/$libperl";
610     } else {
611     $libperl = $Config{libperl};
612     $libpath = $self->{lib}{"CORE/$libperl"};
613 root 1.4 }
614 root 1.8
615 root 1.18 $self->{set}{"dll/$libperl"} = [$libpath]
616 root 1.8 if length $libpath && -e $libpath;
617 root 1.4 }
618     }
619 root 1.3
620     =item $extractor->add_core_support
621    
622     Try to add modules and files needed to support commonly-used builtin
623     language features. For example to open a scalar for I/O you need the
624     L<PerlIO::scalar> module:
625    
626     open $fh, "<", \$scalar
627    
628     A number of regex and string features (e.g. C<ucfirst>) need some unicore
629     files, e.g.:
630    
631     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
632    
633     This call adds these files (simply by executing code similar to the above
634     code fragments).
635    
636     Notable things that are missing are other PerlIO layers, such as
637     L<PerlIO::encoding>, and named character and character class matches.
638    
639     =cut
640    
641     sub add_core_support {
642     my ($self) = @_;
643    
644 root 1.21 $self->add_eval ('
645     # PerlIO::Scalar
646     my $v; open my $fh, "<", \$v;
647    
648     # various unicore regex/builtin gambits
649     my $x = chr 1234;
650     "\u$x\U$x\l$x\L$x";
651     $x =~ /$_$x?/i
652     for qw(\d \w \s \b \R \h \v);
653     split " ", $x; # usually covered by the regex above
654     ');
655    
656 root 1.10 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
657 root 1.3 }
658 root 1.1
659 root 1.3 =item $extractor->add_unicore
660    
661 root 1.11 Adds (hopefully) all files from the unicore database that will ever be
662 root 1.5 needed.
663    
664     If you are not sure which unicode character classes and similar unicore
665     databases you need, and you do not care about an extra one thousand(!)
666     files comprising 4MB of data, then you can just call this method, which
667     adds basically all files from perl's unicode database.
668    
669 root 1.11 Note that C<add_core_support> also adds some unicore files, but it's not a
670     subset of C<add_unicore> - the former adds all files neccessary to support
671     core builtins (which includes some unicore files and other things), while
672     the latter adds all unicore files (but nothing else).
673    
674     When in doubt, use both.
675    
676 root 1.5 =cut
677    
678     sub add_unicore {
679     my ($self) = @_;
680    
681     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
682     }
683    
684 root 1.11 =item $extractor->add_core
685    
686     This adds all files from the perl core distribution, that is, all library
687     files that come with perl.
688    
689     This is a superset of C<add_core_support> and C<add_unicore>.
690    
691     This is quite a lot, but on the plus side, you can be sure nothing is
692     missing.
693    
694 root 1.13 This requires a full perl installation - Debian GNU/Linux doesn't package
695     the full perl library, so this function will not work there.
696 root 1.11
697     =cut
698    
699     sub add_core {
700     my ($self) = @_;
701    
702     my $lib = $self->{lib};
703    
704     for (@{
705 root 1.15 $self->_read_packlist (".packlist")
706 root 1.11 }) {
707     $self->{set}{$_} ||= [
708 root 1.12 "lib/"
709 root 1.11 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
710     ];
711     }
712     }
713    
714 root 1.5 =back
715    
716     =head2 GLOB-BASED ADDING AND FILTERING
717 root 1.1
718 root 1.5 These methods add or manipulate files by using glob-based patterns.
719    
720     These glob patterns work similarly to glob patterns in the shell:
721    
722     =over 4
723    
724     =item /
725    
726     A F</> at the start of the pattern interprets the pattern as a file
727     path inside the file set, almost the same as in the shell. For example,
728     F</bin/perl*> would match all files whose names starting with F<perl>
729     inside the F<bin> directory in the set.
730    
731     If the F</> is missing, then the pattern is interpreted as a module name
732 root 1.12 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
733     while F<Coro::*> would match F<lib/Coro/*.pm>.
734 root 1.5
735     =item *
736    
737     A single star matches anything inside a single directory component. For
738     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
739     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
740 root 1.1
741 root 1.5 Another way to look at it is that a single star matches anything but a
742     slash (F</>).
743 root 1.1
744 root 1.5 =item **
745 root 1.1
746 root 1.5 A double star matches any number of characters in the path, including F</>.
747 root 1.1
748 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
749     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
750 root 1.1
751 root 1.5 =back
752 root 1.1
753 root 1.2 =cut
754    
755     sub _extglob2re {
756 root 1.5 for (quotemeta $_[1]) {
757 root 1.2 s/\\\*\\\*/.*/g;
758     s/\\\*/[^\/]*/g;
759     s/\\\?/[^\/]/g;
760    
761     unless (s%^\\/%%) {
762     s%\\:\\:%/%g;
763 root 1.12 $_ = "lib/$_\\.pm";
764 root 1.2 }
765    
766     $_ .= '$';
767     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
768    
769     return qr<^$_>s
770     }
771     }
772    
773 root 1.5 =over 4
774    
775     =item $extractor->add_glob ($modglob[, $modglob...])
776    
777     Adds all files from the perl library that match the given glob pattern.
778    
779     For example, you could implement C<add_unicore> yourself like this:
780    
781     $extractor->add_glob ("/unicore/**.pl");
782    
783     =cut
784    
785     sub add_glob {
786     my $self = shift;
787    
788     for (@_) {
789     my $pat = $self->_extglob2re ($_);
790     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
791     }
792     }
793    
794     =item $extractor->filter ($pattern[, $pattern...])
795    
796     Applies a series of include/exclude filters. Each filter must start with
797     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
798     pattern. The rest of the pattern is a normal glob pattern.
799    
800     An exclude pattern (C<->) instantly removes all matching files from
801     the set. An include pattern (C<+>) protects matching files from later
802     removals.
803    
804     That is, if you have an include pattern then all files that were matched
805     by it will be included in the set, regardless of any further exclude
806     patterns matching the same files.
807    
808     Likewise, any file excluded by a pattern will not be included in the set,
809     even if matched by later include patterns.
810    
811     Any files not matched by any expression will simply stay in the set.
812    
813     For example, to remove most of the useless autoload functions by the POSIX
814     module (they either do the same thing as a builtin or always raise an
815 root 1.12 error), you would use this:
816 root 1.5
817     $extractor->filter ("-/lib/auto/POSIX/*.al");
818    
819     This does not remove all autoload files, only the ones not defined by a
820     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
821    
822     =cut
823    
824 root 1.2 sub filter {
825     my ($self, @patterns) = @_;
826    
827 root 1.3 $self->_trace_flush;
828    
829     my $set = $self->{set};
830     my %include;
831 root 1.2
832     for my $pat (@patterns) {
833     $pat =~ s/^([+\-])//
834     or croak "$_: not a valid filter pattern (missing + or - prefix)";
835     my $inc = $1 eq "+";
836     $pat = $self->_extglob2re ($pat);
837 root 1.3
838     my @match = grep /$pat/, keys %$set;
839    
840     if ($inc) {
841     @include{@match} = delete @$set{@match};
842     } else {
843     delete @$set{@{ $_->[I_DEP] }} # remove dependents
844     for delete @$set{@match};
845     }
846 root 1.2 }
847 root 1.3
848     my @include = keys %include;
849     @$set{@include} = delete @include{@include};
850 root 1.2 }
851 root 1.1
852 root 1.3 =item $extractor->runtime_only
853    
854     This removes all files that are not needed at runtime, such as static
855     archives, header and other files needed only for compilation of modules,
856     and pod and html files (which are unlikely to be needed at runtime).
857 root 1.1
858 root 1.8 This is quite useful when you want to have only files actually needed to
859 root 1.3 execute a program.
860    
861     =cut
862    
863     sub runtime_only {
864     my ($self) = @_;
865    
866     $self->_trace_flush;
867    
868     my $set = $self->{set};
869    
870 root 1.8 # delete all static libraries, also windows stuff
871 root 1.12 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
872 root 1.3
873     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
874 root 1.12 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
875 root 1.3
876     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
877 root 1.12 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
878 root 1.11
879     # delete unneeded unicore files
880 root 1.12 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
881 root 1.3 }
882 root 1.1
883     =back
884    
885 root 1.5 =head2 RESULT SET
886    
887     =over 4
888 root 1.1
889 root 1.5 =item $set = $extractor->set
890 root 1.1
891 root 1.5 Returns a hash reference that represents the result set. The hash is the
892     actual internal storage hash and can only be modified as described below.
893 root 1.1
894 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
895     e.g.:
896 root 1.1
897 root 1.5 bin/perl
898     lib/unicore/lib/Blk/Superscr.pl
899     lib/AnyEvent/Impl/EV.pm
900    
901     The value is an array reference with mostly unspecified contents, except
902     the first element, which is the file system path where the actual file can
903     be found.
904    
905     This code snippet lists all files inside the set:
906    
907     print "$_\n"
908     for sort keys %{ $extractor->set });
909    
910     This code fragment prints C<< filesystem_path => set_path >> pairs for all
911     files in the set:
912    
913     my $set = $extractor->set;
914     while (my ($set,$fspath) = each %$set) {
915     print "$fspath => $set\n";
916     }
917 root 1.1
918 root 1.5 You can implement your own filtering by asking for the result set with
919     C<< $extractor->set >>, and then deleting keys from the referenced hash
920     - since you can ask for the result set at any time you can add things,
921     filter them out this way, and add additional things.
922    
923     =back
924    
925     =cut
926    
927     sub set {
928 root 1.6 $_[0]->_trace_flush;
929 root 1.5 $_[0]{set}
930     }
931    
932     =head1 EXAMPLE
933    
934     To package he deliantra client (L<Deliantra::Client>), finding all
935     (perl) files needed to run it is a first step. This can be done by using
936     something like the following code snippet:
937    
938 root 1.12 my $ex = new Perl::LibExtractor;
939 root 1.5
940     $ex->add_perl;
941     $ex->add_core_support;
942 root 1.12 $ex->add_bin ("deliantra");
943 root 1.5 $ex->add_mod ("AnyEvent::Impl::EV");
944     $ex->add_mod ("AnyEvent::Impl::Perl");
945     $ex->add_mod ("Urlader");
946     $ex->filter ("-/*/auto/POSIX/**.al");
947     $ex->runtime_only;
948    
949     First it sets the perl library directory to F<pm> and F<.> (the latter
950     to work around some AutoLoader bugs), so perl uses only the perl library
951     files that came with the binary package.
952    
953     Then it sets some environment variable to override the system default
954     (which might be incompatible).
955    
956     Then it runs the client itself, using C<require>. Since C<require> only
957     looks in the perl library directory this is the reaosn why the scripts
958     were put there (of course, since F<.> is also included it doesn't matter,
959     but I refuse to yield to bugs).
960    
961     Finally it exits with a clean status to signal "ok" to Urlader.
962    
963     Back to the original C<Perl::LibExtractor> script: after initialising a
964     new set, the script simply adds the F<perl> interpreter and core support
965     files (just in case, not all are needed, but some are, and I am too lazy
966     to find out which ones exactly).
967    
968     Then it adds the deliantra executable itself, which in turn adds most of
969     the required modules. After that, the AnyEvent implementation modules are
970     added because these dependencies are not picked up automatically.
971    
972     The L<Urlader> module is added because the client itself does not depend
973     on it at all, but the wrapper does.
974    
975     At this point, all required files are present, and it's time to slim
976     down: most of the ueseless POSIX autoloaded functions are removed,
977     not because they are so big, but because creating files is a costly
978     operation in itself, so even small fiels have considerable overhead when
979     unpacking. Then files not required for running the client are removed.
980    
981     And that concludes it, the set is now ready.
982 root 1.1
983     =head1 SEE ALSO
984    
985 root 1.23 The utility program that comes with this module: L<perl-libextractor>.
986 root 1.1
987     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
988    
989 root 1.5 =head1 LICENSE
990    
991     This software package is licensed under the GPL version 3 or any later
992     version, see COPYING for details.
993    
994     This license does not, of course, apply to any output generated by this
995     software.
996    
997 root 1.1 =head1 AUTHOR
998    
999     Marc Lehmann <schmorp@schmorp.de>
1000     http://home.schmorp.de/
1001    
1002     =cut
1003    
1004     1;
1005