ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.6
Committed: Sun Jan 15 14:14:15 2012 UTC (12 years, 6 months ago) by root
Branch: MAIN
Changes since 1.5: +1 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::LibExtractor - determine perl library subsets for building distributions
4    
5     =head1 SYNOPSIS
6    
7     use Perl::LibExtractor;
8    
9     =head1 DESCRIPTION
10    
11     The purpose of this module is to determine subsets of your perl library,
12     that is, a set of files needed to satisfy certain dependencies (e.g. of a
13     program).
14    
15     The goal is to extract a part of your perl installation including
16 root 1.2 dependencies. A typical use case for this module would be to find out
17 root 1.1 which files are needed to be build a L<PAR> distribution, to link into
18     an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19     stand-alone distributions tailormade to run your app.
20    
21     =head1 METHODS
22    
23     To use this module, first call the C<new>-constructor and then as many
24 root 1.5 other methods as you want, to generate a set of files. Then query the set
25     of files and do whatever you want with them.
26 root 1.2
27     The command-line utility F<perl-libextract> can be a convenient
28     alternative to using this module directly, and offers a few extra options,
29     such as to copy out the files into a new directory, strip them and/or
30     manipulate them in other ways.
31 root 1.1
32     =cut
33    
34     package Perl::LibExtractor;
35    
36     our $VERSION = '0.1';
37    
38     use Config;
39     use File::Temp ();
40    
41     use common::sense;
42    
43     sub I_SRC () { 0 }
44     sub I_DEP () { 1 }
45    
46     sub croak($) {
47     require Carp;
48     Carp::croak "(Perl::LibExtractor) $_[0]";
49     }
50    
51 root 1.5 =head2 CREATION
52    
53     =over 4
54    
55 root 1.1 =item $extractor = new Perl::LibExtractor [key => value...]
56    
57     Creates a new extractor object. Each extractor object stores some
58     configuration options and a subset of files that can be queried at any
59     time,.
60    
61     The following key-value pairs exist, with default values as specified.
62    
63     =over 4
64    
65     =item exedir => "bin"
66    
67 root 1.5 The prefix to use for the suggested target path for perl executables
68     (scripts). Defaults to F<bin>.
69 root 1.1
70     =item libdir => "lib"
71    
72     The prefix to use for the suggested target path of perl library
73     files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
74     etc.). Defaults to F<lib>.
75    
76     =item bindir => "bin"
77    
78     The prefix to use for the suggested target path for (non-perl)
79     executables. Defaults to F<bin>.
80    
81     =item dlldir => "bin"
82    
83     The prefix to use for the suggested target path of any shared
84     libraries. Defaults to F<bin>.
85    
86     =item inc => \@INC without "."
87    
88     An arrayref with paths to perl library directories. The default is
89     C<\@INC>, with F<.> removed.
90    
91     To prepend custom dirs just do this:
92    
93     inc => ["mydir", @INC],
94    
95 root 1.3 =item use_packlist => 1
96 root 1.1
97 root 1.3 Enable (if true) or disable the use of C<.packlist> files. If enabled,
98 root 1.5 then each time a file is traced, the complete distribution that contains
99     it is included (but not traced).
100    
101     If disabled, only shared objects and autoload files will be added.
102 root 1.1
103     =back
104    
105     =cut
106    
107     sub new {
108     my ($class, %kv) = @_;
109    
110     my $self = bless {
111     exedir => "bin",
112     libdir => "lib",
113     bindir => "bin",
114     dlldir => "bin",
115     inc => [grep $_ ne ".", @INC],
116 root 1.3 use_packlist => 1,
117 root 1.1 %kv,
118     set => {},
119     }, $class;
120    
121     my %inc_seen;
122 root 1.4 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
123 root 1.1 $self->{inc} = \@inc;
124    
125     $self->_set_inc;
126    
127     $self
128     }
129    
130 root 1.4 sub _perl_path() {
131     my $secure_perl_path = $Config{perlpath};
132    
133     if ($^O ne 'VMS') {
134     $secure_perl_path .= $Config{_exe}
135     unless $secure_perl_path =~ m/$Config{_exe}$/i;
136     }
137    
138     $secure_perl_path
139     }
140    
141 root 1.1 sub _set_inc {
142     my ($self) = @_;
143    
144 root 1.2 my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }};
145     $matchprefix = qr<^(?:$matchprefix)/>;
146    
147 root 1.1 my %lib;
148 root 1.2 my @packlists;
149 root 1.1
150     # find all files in all libdirs, earlier ones overwrite later ones
151 root 1.2 my @scan = map [$_, ""], @{ $self->{inc} };
152    
153     while (@scan) {
154     my ($root, $dir) = @{ pop @scan };
155    
156     my $pfx = length $dir ? "$dir/" : "";
157    
158     for (do {
159     opendir my $fh, "$root/$dir"
160     or croak "$root/$dir: $!";
161     grep !/^\.\.?$/, readdir $fh
162     }) {
163     if (-d "$root/$dir/$_/.") {
164     $lib{"$pfx$_/"} = "$root/$pfx$_";
165     push @scan, [$root, "$pfx$_"];
166     } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
167     push @packlists, [$root, $pfx];
168     } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
169     # skip empty .bs files
170     # } elsif (/\.(?:pod|h|html)$/) {
171     # # not interested in those
172     } else {
173     #push @files, $_;
174     $lib{"$pfx$_"} = "$root/$pfx$_";
175 root 1.1 }
176 root 1.2 }
177    
178     #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
179     }
180    
181     my %packlist;
182 root 1.1
183 root 1.2 # need to go forward here
184     for (@packlists) {
185     my ($root, $auto) = @$_;
186    
187     my @packlist;
188    
189     open my $fh, "<:perlio", "$root/$auto/.packlist"
190     or die "$root/$auto/.packlist: $!";
191    
192     $root = qr<^\Q$root/>;
193    
194     while (<$fh>) {
195     chomp;
196     s/ .*$//; # newer-style .packlists might contain key=value pairs
197     s%/\./%/%g; # yeah, these too
198    
199     s/$root// or next;
200     exists $lib{$_} or next;
201 root 1.1
202 root 1.2 push @packlist, $_;
203     $packlist{$_} = \@packlist;
204     }
205 root 1.1 }
206    
207     $self->{lib} = \%lib;
208 root 1.2 $self->{packlist} = \%packlist;
209     $self->{matchprefix} = $matchprefix;
210 root 1.1 }
211    
212 root 1.5 =back
213    
214     =head2 TRACE/PACKLIST BASED ADDING
215    
216     The following methods add various things to the set of files.
217    
218     Each time a perl file is added, it is scanned by tracing either loading,
219     execution or compiling it, and seeing which other perl modules and
220     libraries have been loaded.
221    
222     For each library file found this way, additional dependencies are added:
223     if packlists are enabled, then all files of the distribution that contains
224     the file will be added. If packlists are disabled, then only shared
225     objects and autoload files for modules will be added.
226    
227     Only files from perl library directories will be added automatically. Any
228     other files (such as manpages or scripts installed in the F<bin>
229     directory) are skipped.
230    
231     If there is an error, such as a module not being found, then this module
232     croaks (as opposed to silently skipping). If you want to add something of
233     which you are not sure it exists, then you can wrap the call into C<eval
234     {}>. In some cases, you can avoid this by executing the code you want
235     to work later using C<add_eval> - see C<add_core_support> for an actual
236     example of this technique.
237    
238     Note that packlists are meant to add files not covered by other
239     mechanisms, such as resource files and other data files loaded directly by
240     a module - they are not meant to add dependencies that are missed because
241     they only happen at runtime.
242    
243     For example, with packlists, when using L<AnyEvent>, then all event loop
244     backends are automatically added as well, but I<not> any event loops
245     (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
246     packlists, only the backend that is being used is added (i.e. normally
247     none, as loading AnyEvent does not instantly load any backend).
248    
249     To catch the extra event loop dependencies, you can either initialise
250     AnyEvent so it picks a suitable backend:
251    
252     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
253    
254     Or you can directly load the backend modules you plan to use:
255    
256     $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
257    
258     An example of a program (or module) that has extra resource files is
259     L<Deliantra::Client> - the normal tracing (without packlist usage) will
260     correctly add all submodules, but miss the fonts and textures. By using
261     the packlist, those files are added correctly.
262    
263     =over 4
264    
265     =cut
266    
267 root 1.1 sub _add {
268     my ($self, $add) = @_;
269    
270     my $lib = $self->{lib};
271     my $path;
272    
273     for (@$add) {
274     $path = "$self->{libdir}/$_";
275    
276     $self->{set}{$path} ||= do {
277     my @info;
278    
279     $info[I_SRC] = $lib->{$_}
280     or croak "$_: unable to locate file in perl library";
281    
282 root 1.3 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
283 root 1.2 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
284     for @{ $self->{packlist}{$_} };
285 root 1.3
286     # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
287     # s/\.pm$//;
288     # s%/%::%g;
289     # my $pkg = "libextractor" . ++$self->{count};
290     # $self->add_eval ("{ package $pkg; eval 'use $_' }")
291     # unless $self->{_add_do}{$_}++;
292     # }
293     #
294     # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
295     # for grep /\.pl$/, @{ $self->{packlist}{$_} };
296    
297 root 1.2 } elsif (/^(.*)\.pm$/) {
298 root 1.1 (my $auto = "auto/$1/") =~ s%::%/%g;
299     $auto =~ m%/([^/]+)/$% or die;
300     my $base = $1;
301    
302     if (exists $lib->{$auto}) {
303     # auto dir exists, scan it for cool stuff
304    
305     # 1. shared object, others are of no interest to us
306     my $so = "$auto$base.$Config{dlext}";
307     if (my $src = $lib->{$so}) {
308     $so = "$self->{libdir}/$so";
309     push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
310     }
311    
312     # 2. autoloader/autosplit
313     my $ix = "${auto}autosplit.ix";
314     if (my $src = $lib->{$ix}) {
315     $ix = "$self->{libdir}/$ix";
316     push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
317    
318     open my $fh, "<:perlio", $src
319     or croak "$src: $!";
320    
321     my $package;
322    
323     while (<$fh>) {
324     if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
325     my $al = "auto/$package/$1.al";
326     my $src = $lib->{$al}
327     or croak "$al: autoload file not found, but should be there.";
328    
329     $al = "$self->{libdir}/$al";
330     push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
331    
332     } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
333     ($package = $1) =~ s/::/\//g;
334     } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
335     # nop
336     } else {
337     warn "WARNING: $src: unparsable line, please report: $_";
338     }
339     }
340     }
341    
342     skip:
343     }
344     }
345    
346     \@info
347     };
348     }
349     }
350    
351     sub _trace {
352     my ($self, $file, $eval) = @_;
353    
354 root 1.3 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
355 root 1.1 }
356    
357     sub _trace_flush {
358     my ($self) = @_;
359    
360 root 1.3 # ->_add might add additional files to trace
361     while (exists $self->{trace_begin} or exists $self->{trace_check}) {
362     my $tmpdir = newdir File::Temp;
363     my $dir = $tmpdir->dirname;
364    
365     open my $fh, ">:perlio", "$dir/eval"
366     or croak "$dir/eval: $!";
367     syswrite $fh,
368     'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
369     . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
370     . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
371     . "CHECK {\n"
372     . 'open STDOUT, ">:raw", "out" or die "out: $!";'
373     . 'print join "\x00", values %INC;'
374     . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
375     . "}\n"
376     . (delete $self->{trace_check});
377     close $fh;
378    
379 root 1.4 system _perl_path, "-c", "$dir/eval"
380 root 1.3 and croak "trace failure, check trace process output - caught";
381 root 1.1
382 root 1.3 my @inc = split /\x00/, do {
383     open my $fh, "<:perlio", "$dir/out"
384     or croak "$dir/out: $!";
385     local $/;
386     scalar readline $fh
387     };
388 root 1.1
389 root 1.3 my $matchprefix = $self->{matchprefix};
390 root 1.1
391 root 1.3 # remove the library directory prefix, hope for the best
392     s/$matchprefix//
393     or croak "$_: file outside any library directory"
394     for @inc;
395 root 1.1
396 root 1.3 $self->_add (\@inc);
397     }
398 root 1.1 }
399    
400     =item $extractor->add_mod ($module[, $module...])
401    
402     Adds the given module(s) to the file set - the module name must be specified
403     as in C<use>, i.e. with C<::> as separators and without F<.pm>.
404    
405     The program will be loaded with the default import list, any dependent
406     files, such as the shared object implementing xs functions, or autoload
407     files, will also be added.
408    
409 root 1.5 If you want to use a different import list (for those rare modules wghere
410     import lists trigger different backend modules to be loaded for example),
411     you can use C<add_eval> instead:
412    
413     $extractor->add_eval ("use Module qw(a b c)");
414    
415 root 1.1 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
416     from the distribution they are part of.
417    
418     $extractor->add_mod ("Coro", "AnyEvent::AIO");
419    
420     =cut
421    
422     sub add_mod {
423     my $self = shift;
424    
425 root 1.3 for (@_) {
426     my $pkg = "libextractor" . ++$self->{count};
427     $self->_trace ("use $_", "{ package $pkg; use $_ }")
428     unless $self->{add_mod}{$_}++;
429     }
430 root 1.1 }
431    
432 root 1.5 =item $extractor->add_script ($name[, $name...])
433 root 1.1
434     Adds the given (perl) program(s) to the file set, that is, a program
435     installed by some perl module, written in perl (an example would be the
436     L<perl-libextract> program that is part of the C<Perl::LibExtractor>
437     distribution).
438    
439 root 1.5 Example: add the deliantra client program installed by the
440     L<Deliantra::Client> module.
441 root 1.1
442 root 1.5 $extractor->add_script ("deliantra");
443 root 1.1
444     =cut
445    
446 root 1.5 sub add_script {
447 root 1.1 my $self = shift;
448    
449     exe:
450     for my $exe (@_) {
451     for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
452     if (open my $fh, "<:perlio", "$dir/$exe") {
453    
454     my $file = do { local $/; readline $fh };
455    
456     $self->_trace_flush if exists $self->{trace_check};
457     $self->{trace_check} = $file;
458    
459     $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"];
460     next exe;
461     }
462     }
463    
464 root 1.5 croak "add_script ($exe): executable not found";
465 root 1.1 }
466     }
467    
468     =item $extractor->add_eval ($string)
469    
470 root 1.5 Evaluates the string as perl code and adds all modules that are loaded
471     by it. For example, this would add L<AnyEvent> and the default backend
472     implementation module and event loop module:
473 root 1.1
474     $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
475    
476 root 1.5 Each code snippet will be executed in its own package and under C<use
477     strict>.
478    
479 root 1.1 =cut
480    
481     sub add_eval {
482     my ($self, $eval) = @_;
483    
484 root 1.5 my $pkg = "libextractor" . ++$self->{count};
485 root 1.1 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
486 root 1.3 $self->_trace ($eval,
487     "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
488 root 1.5 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
489 root 1.3 );
490 root 1.1 }
491    
492 root 1.5 =back
493    
494     =head2 OTHER METHODS FOR ADDING FILES
495    
496     The following methods add commonly used files that are either not covered
497     by other methods or add commonly-used dependencies.
498    
499     =over 4
500    
501 root 1.1 =item $extractor->add_perl
502    
503     Adds the perl binary itself to the file set, including the libperl dll, if
504     needed.
505    
506 root 1.5 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
507     some F<lib/libperl.so.XXX>.
508    
509 root 1.4 =cut
510    
511     sub add_perl {
512     my ($self) = @_;
513    
514     $self->{set}{"$self->{bindir}/perl$Config{_exe}"} = [_perl_path];
515    
516     # on debian, we have the special case of a perl binary linked against
517     # a static libperl.a (which is not available), but the Config says to use
518     # a shared library, which is in the wrong directory, too (which breaks
519     # every other perl installation on the system - they are so stupid).
520    
521     # that means we can't find the libperl.so, because dbeian actively breaks
522     # their perl install, and we don't need it. we work around this by silently
523     # not including the libperl if we cannot find it.
524    
525     if ($Config{useshrplib} eq "true") {
526     if (my $libperl = $self->{lib}{"CORE/$Config{libperl}"}) {
527     $self->{set}{"$self->{dlldir}/$Config{libperl}"} = $libperl;
528     }
529     }
530     }
531 root 1.3
532     =item $extractor->add_core_support
533    
534     Try to add modules and files needed to support commonly-used builtin
535     language features. For example to open a scalar for I/O you need the
536     L<PerlIO::scalar> module:
537    
538     open $fh, "<", \$scalar
539    
540     A number of regex and string features (e.g. C<ucfirst>) need some unicore
541     files, e.g.:
542    
543     'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
544    
545     This call adds these files (simply by executing code similar to the above
546     code fragments).
547    
548     Notable things that are missing are other PerlIO layers, such as
549     L<PerlIO::encoding>, and named character and character class matches.
550    
551     =cut
552    
553     sub add_core_support {
554     my ($self) = @_;
555    
556     $self->add_eval ('my $v; open my $fh, "<", \$v');
557     $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
558     }
559 root 1.1
560 root 1.3 =item $extractor->add_unicore
561    
562 root 1.5 Adds (hopefully) all files form the unicore database that will ever be
563     needed.
564    
565     If you are not sure which unicode character classes and similar unicore
566     databases you need, and you do not care about an extra one thousand(!)
567     files comprising 4MB of data, then you can just call this method, which
568     adds basically all files from perl's unicode database.
569    
570     =cut
571    
572     sub add_unicore {
573     my ($self) = @_;
574    
575     $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
576     }
577    
578     =back
579    
580     =head2 GLOB-BASED ADDING AND FILTERING
581 root 1.1
582 root 1.5 These methods add or manipulate files by using glob-based patterns.
583    
584     These glob patterns work similarly to glob patterns in the shell:
585    
586     =over 4
587    
588     =item /
589    
590     A F</> at the start of the pattern interprets the pattern as a file
591     path inside the file set, almost the same as in the shell. For example,
592     F</bin/perl*> would match all files whose names starting with F<perl>
593     inside the F<bin> directory in the set.
594    
595     If the F</> is missing, then the pattern is interpreted as a module name
596     (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
597     (where F<libdir> is the perl library directory), while F<Coro::*> would
598     match F<libdir/Coro/*.pm>.
599    
600     =item *
601    
602     A single star matches anything inside a single directory component. For
603     example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
604     F<lib/Coro/> directory, but not any files deeper in the hierarchy.
605 root 1.1
606 root 1.5 Another way to look at it is that a single star matches anything but a
607     slash (F</>).
608 root 1.1
609 root 1.5 =item **
610 root 1.1
611 root 1.5 A double star matches any number of characters in the path, including F</>.
612 root 1.1
613 root 1.5 For example, F<AnyEvent::**> would match all modules whose names start
614     with C<AnyEvent::>, no matter how deep in the hierarchy they are.
615 root 1.1
616 root 1.5 =back
617 root 1.1
618 root 1.2 =cut
619    
620     sub _extglob2re {
621 root 1.5 for (quotemeta $_[1]) {
622 root 1.2 s/\\\*\\\*/.*/g;
623     s/\\\*/[^\/]*/g;
624     s/\\\?/[^\/]/g;
625    
626     unless (s%^\\/%%) {
627     s%\\:\\:%/%g;
628 root 1.5 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
629 root 1.2 }
630    
631     $_ .= '$';
632     s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
633    
634     return qr<^$_>s
635     }
636     }
637    
638 root 1.5 =over 4
639    
640     =item $extractor->add_glob ($modglob[, $modglob...])
641    
642     Adds all files from the perl library that match the given glob pattern.
643    
644     For example, you could implement C<add_unicore> yourself like this:
645    
646     $extractor->add_glob ("/unicore/**.pl");
647    
648     =cut
649    
650     sub add_glob {
651     my $self = shift;
652    
653     for (@_) {
654     my $pat = $self->_extglob2re ($_);
655     $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
656     }
657     }
658    
659     =item $extractor->filter ($pattern[, $pattern...])
660    
661     Applies a series of include/exclude filters. Each filter must start with
662     either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
663     pattern. The rest of the pattern is a normal glob pattern.
664    
665     An exclude pattern (C<->) instantly removes all matching files from
666     the set. An include pattern (C<+>) protects matching files from later
667     removals.
668    
669     That is, if you have an include pattern then all files that were matched
670     by it will be included in the set, regardless of any further exclude
671     patterns matching the same files.
672    
673     Likewise, any file excluded by a pattern will not be included in the set,
674     even if matched by later include patterns.
675    
676     Any files not matched by any expression will simply stay in the set.
677    
678     For example, to remove most of the useless autoload functions by the POSIX
679     module (they either do the same thing as a builtin or always raise an
680     error), you would use this (assuming a default C<libdir>):
681    
682     $extractor->filter ("-/lib/auto/POSIX/*.al");
683    
684     This does not remove all autoload files, only the ones not defined by a
685     subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
686    
687     =cut
688    
689 root 1.2 sub filter {
690     my ($self, @patterns) = @_;
691    
692 root 1.3 $self->_trace_flush;
693    
694     my $set = $self->{set};
695     my %include;
696 root 1.2
697     for my $pat (@patterns) {
698     $pat =~ s/^([+\-])//
699     or croak "$_: not a valid filter pattern (missing + or - prefix)";
700     my $inc = $1 eq "+";
701     $pat = $self->_extglob2re ($pat);
702 root 1.3
703     my @match = grep /$pat/, keys %$set;
704    
705     if ($inc) {
706     @include{@match} = delete @$set{@match};
707     } else {
708     delete @$set{@{ $_->[I_DEP] }} # remove dependents
709     for delete @$set{@match};
710     }
711 root 1.2 }
712 root 1.3
713     my @include = keys %include;
714     @$set{@include} = delete @include{@include};
715 root 1.2 }
716 root 1.1
717 root 1.3 =item $extractor->runtime_only
718    
719     This removes all files that are not needed at runtime, such as static
720     archives, header and other files needed only for compilation of modules,
721     and pod and html files (which are unlikely to be needed at runtime).
722 root 1.1
723 root 1.3 This is quite useful when you want to have only fiels actually needed to
724     execute a program.
725    
726     =cut
727    
728     sub runtime_only {
729     my ($self) = @_;
730    
731     $self->_trace_flush;
732    
733     my $set = $self->{set};
734    
735     # delete all static libraries
736     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set };
737    
738     # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
739     delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
740    
741     # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
742     delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set };
743     }
744 root 1.1
745     =back
746    
747 root 1.5 =head2 RESULT SET
748    
749     =over 4
750 root 1.1
751 root 1.5 =item $set = $extractor->set
752 root 1.1
753 root 1.5 Returns a hash reference that represents the result set. The hash is the
754     actual internal storage hash and can only be modified as described below.
755 root 1.1
756 root 1.5 Each key in the hash is the path inside the set, without a leading slash,
757     e.g.:
758 root 1.1
759 root 1.5 bin/perl
760     lib/unicore/lib/Blk/Superscr.pl
761     lib/AnyEvent/Impl/EV.pm
762    
763     The value is an array reference with mostly unspecified contents, except
764     the first element, which is the file system path where the actual file can
765     be found.
766    
767     This code snippet lists all files inside the set:
768    
769     print "$_\n"
770     for sort keys %{ $extractor->set });
771    
772     This code fragment prints C<< filesystem_path => set_path >> pairs for all
773     files in the set:
774    
775     my $set = $extractor->set;
776     while (my ($set,$fspath) = each %$set) {
777     print "$fspath => $set\n";
778     }
779 root 1.1
780 root 1.5 You can implement your own filtering by asking for the result set with
781     C<< $extractor->set >>, and then deleting keys from the referenced hash
782     - since you can ask for the result set at any time you can add things,
783     filter them out this way, and add additional things.
784    
785     =back
786    
787     =cut
788    
789     sub set {
790 root 1.6 $_[0]->_trace_flush;
791 root 1.5 $_[0]{set}
792     }
793    
794     =head1 EXAMPLE
795    
796     To package he deliantra client (L<Deliantra::Client>), finding all
797     (perl) files needed to run it is a first step. This can be done by using
798     something like the following code snippet:
799    
800     my $ex = new Perl::LibExtractor
801     exedir => ".", dlldir => ".",
802     libdir => "pm", bindir => "pm/bin";
803    
804     $ex->add_perl;
805     $ex->add_core_support;
806     $ex->add_script ("deliantra");
807     $ex->add_mod ("AnyEvent::Impl::EV");
808     $ex->add_mod ("AnyEvent::Impl::Perl");
809     $ex->add_mod ("Urlader");
810     $ex->filter ("-/*/auto/POSIX/**.al");
811     $ex->runtime_only;
812    
813     Let's first find out about the choice of paths for the subset. The
814     Deliantra client binary packages use L<Urlader> nowadays, and there it is
815     convenient to have F<perl> and any shared libraries directly in the root
816     of the distribution.
817    
818     The perl library files are put into a directory named F<pm>, simply
819     because it's shorter than F<lib>, and in the future, some files might go
820     into F<lib>.
821    
822     And finally, the F<deliantra> script itself is put into the perl library
823     directory, because it is not run directly - the installed client uses the
824     system fonts and other resources, while the binary package is supposed
825     to use the files packaged with it. To achieve this, a wrapper script is
826     created, called F<run>; which displays a splash screen and configures the
827     environment. A simplified version of it could look like this:
828    
829     @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
830     $ENV{PANGO_RC_FILE} = "pango.rc";
831     require "bin/deliantra";
832     exit 0;
833    
834     First it sets the perl library directory to F<pm> and F<.> (the latter
835     to work around some AutoLoader bugs), so perl uses only the perl library
836     files that came with the binary package.
837    
838     Then it sets some environment variable to override the system default
839     (which might be incompatible).
840    
841     Then it runs the client itself, using C<require>. Since C<require> only
842     looks in the perl library directory this is the reaosn why the scripts
843     were put there (of course, since F<.> is also included it doesn't matter,
844     but I refuse to yield to bugs).
845    
846     Finally it exits with a clean status to signal "ok" to Urlader.
847    
848     Back to the original C<Perl::LibExtractor> script: after initialising a
849     new set, the script simply adds the F<perl> interpreter and core support
850     files (just in case, not all are needed, but some are, and I am too lazy
851     to find out which ones exactly).
852    
853     Then it adds the deliantra executable itself, which in turn adds most of
854     the required modules. After that, the AnyEvent implementation modules are
855     added because these dependencies are not picked up automatically.
856    
857     The L<Urlader> module is added because the client itself does not depend
858     on it at all, but the wrapper does.
859    
860     At this point, all required files are present, and it's time to slim
861     down: most of the ueseless POSIX autoloaded functions are removed,
862     not because they are so big, but because creating files is a costly
863     operation in itself, so even small fiels have considerable overhead when
864     unpacking. Then files not required for running the client are removed.
865    
866     And that concludes it, the set is now ready.
867 root 1.1
868     =head1 SEE ALSO
869    
870     The utility program that comes with this module: L<perl-libextract>.
871    
872     L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
873    
874 root 1.5 =head1 LICENSE
875    
876     This software package is licensed under the GPL version 3 or any later
877     version, see COPYING for details.
878    
879     This license does not, of course, apply to any output generated by this
880     software.
881    
882 root 1.1 =head1 AUTHOR
883    
884     Marc Lehmann <schmorp@schmorp.de>
885     http://home.schmorp.de/
886    
887     =cut
888    
889     1;
890