ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.11
Committed: Tue Jan 17 21:41:41 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.10: +69 -20 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set
25 of files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =cut
33
34 package Perl::LibExtractor;
35
36 our $VERSION = '0.1';
37
38 use Config;
39 use File::Spec ();
40 use File::Temp ();
41
42 use common::sense;
43
44 sub I_SRC () { 0 }
45 sub I_DEP () { 1 }
46
47 sub croak($) {
48 require Carp;
49 Carp::croak "(Perl::LibExtractor) $_[0]";
50 }
51
52 my $canonpath = File::Spec->can ("canonpath");
53 my $case_tolerant = File::Spec->case_tolerant;
54
55 sub canonpath($) {
56 local $_ = $canonpath->(File::Spec::, $_[0]);
57 s%\\%/%g;
58 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 $_
60 }
61
62 =head2 CREATION
63
64 =over 4
65
66 =item $extractor = new Perl::LibExtractor [key => value...]
67
68 Creates a new extractor object. Each extractor object stores some
69 configuration options and a subset of files that can be queried at any
70 time,.
71
72 The following key-value pairs exist, with default values as specified.
73
74 =over 4
75
76 =item exedir => "bin"
77
78 The prefix to use for the suggested target path for perl executables
79 (scripts). Defaults to F<bin>.
80
81 =item libdir => "lib"
82
83 The prefix to use for the suggested target path of perl library
84 files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files
85 etc.). Defaults to F<lib>.
86
87 =item bindir => "bin"
88
89 The prefix to use for the suggested target path for (non-perl)
90 executables. Defaults to F<bin>.
91
92 =item dlldir => "bin"
93
94 The prefix to use for the suggested target path of any shared
95 libraries. Defaults to F<bin>.
96
97 =item inc => \@INC without "."
98
99 An arrayref with paths to perl library directories. The default is
100 C<\@INC>, with F<.> removed.
101
102 To prepend custom dirs just do this:
103
104 inc => ["mydir", @INC],
105
106 =item use_packlist => 1
107
108 Enable (if true) or disable the use of C<.packlist> files. If enabled,
109 then each time a file is traced, the complete distribution that contains
110 it is included (but not traced).
111
112 If disabled, only shared objects and autoload files will be added.
113
114 =back
115
116 =cut
117
118 sub new {
119 my ($class, %kv) = @_;
120
121 my $self = bless {
122 exedir => "bin",
123 libdir => "lib",
124 bindir => "bin",
125 dlldir => "bin",
126 inc => [grep $_ ne ".", @INC],
127 use_packlist => 1,
128 %kv,
129 set => {},
130 }, $class;
131
132 my %inc_seen;
133 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
134 $self->{inc} = \@inc;
135
136 $self->_set_inc;
137
138 $self
139 }
140
141 sub _perl_path() {
142 my $secure_perl_path = $Config{perlpath};
143
144 if ($^O ne 'VMS') {
145 $secure_perl_path .= $Config{_exe}
146 unless $secure_perl_path =~ m/$Config{_exe}$/i;
147 }
148
149 $secure_perl_path
150 }
151
152 sub _path2match {
153 my $re = join "|", map "\Q$_", @_;
154
155 $re = "(?:$re)\\/";
156 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
157
158 $case_tolerant
159 ? qr<$re>i
160 : qr<$re>
161 }
162
163 sub _read_packlist {
164 my ($self, $root, $path) = @_;
165
166 my $lib = $self->{lib};
167
168 my @packlist;
169
170 open my $fh, "<:perlio", "$root/$path"
171 or die "$root/$path: $!";
172
173 $root = _path2match $root;
174
175 while (<$fh>) {
176 chomp;
177 s/ .*$//; # newer-style .packlists might contain key=value pairs
178
179 s/$root// and exists $lib->{$_}
180 or next;
181
182 push @packlist, canonpath $_;
183 }
184
185 \@packlist
186 }
187
188 sub _set_inc {
189 my ($self) = @_;
190
191 my $matchprefix = _path2match @{ $self->{inc }};
192
193 my %lib;
194 my @packlists;
195
196 # find all files in all libdirs, earlier ones overwrite later ones
197 my @scan = map [$_, ""], @{ $self->{inc} };
198
199 while (@scan) {
200 my ($root, $dir) = @{ pop @scan };
201
202 my $pfx = length $dir ? "$dir/" : "";
203
204 for (do {
205 opendir my $fh, "$root/$dir"
206 or croak "$root/$dir: $!";
207 grep !/^\.\.?$/, readdir $fh
208 }) {
209 if (-d "$root/$dir/$_/.") {
210 $lib{"$pfx$_/"} = "$root/$pfx$_";
211 push @scan, [$root, "$pfx$_"];
212 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
213 push @packlists, [$root, $pfx];
214 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
215 # skip empty .bs files
216 # } elsif (/\.(?:pod|h|html)$/) {
217 # # not interested in those
218 } else {
219 #push @files, $_;
220 $lib{"$pfx$_"} = "$root/$pfx$_";
221 }
222 }
223
224 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
225 }
226
227 my %packlist;
228
229 # need to go forward here
230 for (@packlists) {
231 my $packlist = $self->_read_packlist ($_->[0], "$_->[1]/.packlist");
232
233 $packlist{$_} = $packlist
234 for @$packlist;
235 }
236
237 $self->{lib} = \%lib;
238 $self->{packlist} = \%packlist;
239 $self->{matchprefix} = $matchprefix;
240 }
241
242 =back
243
244 =head2 TRACE/PACKLIST BASED ADDING
245
246 The following methods add various things to the set of files.
247
248 Each time a perl file is added, it is scanned by tracing either loading,
249 execution or compiling it, and seeing which other perl modules and
250 libraries have been loaded.
251
252 For each library file found this way, additional dependencies are added:
253 if packlists are enabled, then all files of the distribution that contains
254 the file will be added. If packlists are disabled, then only shared
255 objects and autoload files for modules will be added.
256
257 Only files from perl library directories will be added automatically. Any
258 other files (such as manpages or scripts installed in the F<bin>
259 directory) are skipped.
260
261 If there is an error, such as a module not being found, then this module
262 croaks (as opposed to silently skipping). If you want to add something of
263 which you are not sure it exists, then you can wrap the call into C<eval
264 {}>. In some cases, you can avoid this by executing the code you want
265 to work later using C<add_eval> - see C<add_core_support> for an actual
266 example of this technique.
267
268 Note that packlists are meant to add files not covered by other
269 mechanisms, such as resource files and other data files loaded directly by
270 a module - they are not meant to add dependencies that are missed because
271 they only happen at runtime.
272
273 For example, with packlists, when using L<AnyEvent>, then all event loop
274 backends are automatically added as well, but I<not> any event loops
275 (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
276 packlists, only the backend that is being used is added (i.e. normally
277 none, as loading AnyEvent does not instantly load any backend).
278
279 To catch the extra event loop dependencies, you can either initialise
280 AnyEvent so it picks a suitable backend:
281
282 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
283
284 Or you can directly load the backend modules you plan to use:
285
286 $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
287
288 An example of a program (or module) that has extra resource files is
289 L<Deliantra::Client> - the normal tracing (without packlist usage) will
290 correctly add all submodules, but miss the fonts and textures. By using
291 the packlist, those files are added correctly.
292
293 =over 4
294
295 =cut
296
297 sub _add {
298 my ($self, $add) = @_;
299
300 my $lib = $self->{lib};
301 my $path;
302
303 for (@$add) {
304 $path = "$self->{libdir}/$_";
305
306 $self->{set}{$path} ||= do {
307 my @info;
308
309 $info[I_SRC] = $lib->{$_}
310 or croak "$_: unable to locate file in perl library";
311
312 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
313 $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die]
314 for @{ $self->{packlist}{$_} };
315
316 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
317 # s/\.pm$//;
318 # s%/%::%g;
319 # my $pkg = "libextractor" . ++$self->{count};
320 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
321 # unless $self->{_add_do}{$_}++;
322 # }
323 #
324 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
325 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
326
327 } elsif (/^(.*)\.pm$/) {
328 (my $auto = "auto/$1/") =~ s%::%/%g;
329 $auto =~ m%/([^/]+)/$% or die;
330 my $base = $1;
331
332 if (exists $lib->{$auto}) {
333 # auto dir exists, scan it for cool stuff
334
335 # 1. shared object, others are of no interest to us
336 my $so = "$auto$base.$Config{dlext}";
337 if (my $src = $lib->{$so}) {
338 $so = "$self->{libdir}/$so";
339 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
340 }
341
342 # 2. autoloader/autosplit
343 my $ix = "${auto}autosplit.ix";
344 if (my $src = $lib->{$ix}) {
345 $ix = "$self->{libdir}/$ix";
346 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
347
348 open my $fh, "<:perlio", $src
349 or croak "$src: $!";
350
351 my $package;
352
353 while (<$fh>) {
354 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
355 my $al = "auto/$package/$1.al";
356 my $src = $lib->{$al}
357 or croak "$al: autoload file not found, but should be there.";
358
359 $al = "$self->{libdir}/$al";
360 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
361
362 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
363 ($package = $1) =~ s/::/\//g;
364 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
365 # nop
366 } else {
367 warn "WARNING: $src: unparsable line, please report: $_";
368 }
369 }
370 }
371
372 skip:
373 }
374 }
375
376 \@info
377 };
378 }
379 }
380
381 sub _trace {
382 my ($self, $file, $eval) = @_;
383
384 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
385 }
386
387 sub _trace_flush {
388 my ($self) = @_;
389
390 # ->_add might add additional files to trace
391 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
392 my $tmpdir = newdir File::Temp;
393 my $dir = $tmpdir->dirname;
394
395 open my $fh, ">:perlio", "$dir/eval"
396 or croak "$dir/eval: $!";
397 syswrite $fh,
398 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
399 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
400 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
401 . "CHECK {\n"
402 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
403 . 'print join "\x00", values %INC;'
404 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
405 . "}\n"
406 . (delete $self->{trace_check});
407 close $fh;
408
409 system _perl_path, "-c", "$dir/eval"
410 and croak "trace failure, check trace process output - caught";
411
412 my @inc = split /\x00/, do {
413 open my $fh, "<:perlio", "$dir/out"
414 or croak "$dir/out: $!";
415 local $/;
416 scalar readline $fh
417 };
418
419 my $matchprefix = $self->{matchprefix};
420
421 # remove the library directory prefix, hope for the best
422 s/$matchprefix//
423 or croak "$_: file outside any library directory"
424 for @inc;
425
426 $self->_add (\@inc);
427 }
428 }
429
430 =item $extractor->add_mod ($module[, $module...])
431
432 Adds the given module(s) to the file set - the module name must be specified
433 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
434
435 The program will be loaded with the default import list, any dependent
436 files, such as the shared object implementing xs functions, or autoload
437 files, will also be added.
438
439 If you want to use a different import list (for those rare modules wghere
440 import lists trigger different backend modules to be loaded for example),
441 you can use C<add_eval> instead:
442
443 $extractor->add_eval ("use Module qw(a b c)");
444
445 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
446 from the distribution they are part of.
447
448 $extractor->add_mod ("Coro", "AnyEvent::AIO");
449
450 =cut
451
452 sub add_mod {
453 my $self = shift;
454
455 for (@_) {
456 my $pkg = "libextractor" . ++$self->{count};
457 $self->_trace ("use $_", "{ package $pkg; use $_ }")
458 unless $self->{add_mod}{$_}++;
459 }
460 }
461
462 =item $extractor->add_script ($name[, $name...])
463
464 Adds the given (perl) program(s) to the file set, that is, a program
465 installed by some perl module, written in perl (an example would be the
466 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
467 distribution).
468
469 Example: add the deliantra client program installed by the
470 L<Deliantra::Client> module.
471
472 $extractor->add_script ("deliantra");
473
474 =cut
475
476 sub add_script {
477 my $self = shift;
478
479 exe:
480 for my $exe (@_) {
481 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
482 if (open my $fh, "<:perlio", "$dir/$exe") {
483
484 my $file = do { local $/; readline $fh };
485
486 $self->_trace_flush if exists $self->{trace_check};
487 $self->{trace_check} = $file;
488
489 $self->{set}{"$self->{bindir}/$exe"} = ["$dir/$exe"];
490 next exe;
491 }
492 }
493
494 croak "add_script ($exe): executable not found";
495 }
496 }
497
498 =item $extractor->add_eval ($string)
499
500 Evaluates the string as perl code and adds all modules that are loaded
501 by it. For example, this would add L<AnyEvent> and the default backend
502 implementation module and event loop module:
503
504 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
505
506 Each code snippet will be executed in its own package and under C<use
507 strict>.
508
509 =cut
510
511 sub add_eval {
512 my ($self, $eval) = @_;
513
514 my $pkg = "libextractor" . ++$self->{count};
515 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
516 $self->_trace ($eval,
517 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
518 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
519 );
520 }
521
522 =back
523
524 =head2 OTHER METHODS FOR ADDING FILES
525
526 The following methods add commonly used files that are either not covered
527 by other methods or add commonly-used dependencies.
528
529 =over 4
530
531 =item $extractor->add_perl
532
533 Adds the perl binary itself to the file set, including the libperl dll, if
534 needed.
535
536 For example, on UNIX systems, this usually adds a F<bin/perl> and possibly
537 some F<lib/libperl.so.XXX>.
538
539 =cut
540
541 sub add_perl {
542 my ($self) = @_;
543
544 $self->{set}{"$self->{exedir}/perl$Config{_exe}"} = [_perl_path];
545
546 # on debian, we have the special case of a perl binary linked against
547 # a static libperl.a (which is not available), but the Config says to use
548 # a shared library, which is in the wrong directory, too (which breaks
549 # every other perl installation on the system - they are so stupid).
550
551 # that means we can't find the libperl.so, because dbeian actively breaks
552 # their perl install, and we don't need it. we work around this by silently
553 # not including the libperl if we cannot find it.
554
555 if ($Config{useshrplib} eq "true") {
556 my ($libperl, $libpath);
557
558 if ($^O eq "cygwin") {
559 $libperl = $Config{libperl};
560 $libpath = "$Config{binexp}/$libperl";
561 } elsif ($^O eq "MSWin32") {
562 ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
563 $libpath = "$Config{binexp}/$libperl";
564 } else {
565 $libperl = $Config{libperl};
566 $libpath = $self->{lib}{"CORE/$libperl"};
567 }
568
569 $self->{set}{"$self->{dlldir}/$libperl"} = $libpath
570 if length $libpath && -e $libpath;
571 }
572 }
573
574 =item $extractor->add_core_support
575
576 Try to add modules and files needed to support commonly-used builtin
577 language features. For example to open a scalar for I/O you need the
578 L<PerlIO::scalar> module:
579
580 open $fh, "<", \$scalar
581
582 A number of regex and string features (e.g. C<ucfirst>) need some unicore
583 files, e.g.:
584
585 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
586
587 This call adds these files (simply by executing code similar to the above
588 code fragments).
589
590 Notable things that are missing are other PerlIO layers, such as
591 L<PerlIO::encoding>, and named character and character class matches.
592
593 =cut
594
595 sub add_core_support {
596 my ($self) = @_;
597
598 $self->add_eval ('my $v; open my $fh, "<", \$v');
599 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
600 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
601 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
602 }
603
604 =item $extractor->add_unicore
605
606 Adds (hopefully) all files from the unicore database that will ever be
607 needed.
608
609 If you are not sure which unicode character classes and similar unicore
610 databases you need, and you do not care about an extra one thousand(!)
611 files comprising 4MB of data, then you can just call this method, which
612 adds basically all files from perl's unicode database.
613
614 Note that C<add_core_support> also adds some unicore files, but it's not a
615 subset of C<add_unicore> - the former adds all files neccessary to support
616 core builtins (which includes some unicore files and other things), while
617 the latter adds all unicore files (but nothing else).
618
619 When in doubt, use both.
620
621 =cut
622
623 sub add_unicore {
624 my ($self) = @_;
625
626 $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
627 }
628
629 =item $extractor->add_core
630
631 This adds all files from the perl core distribution, that is, all library
632 files that come with perl.
633
634 This is a superset of C<add_core_support> and C<add_unicore>.
635
636 This is quite a lot, but on the plus side, you can be sure nothing is
637 missing.
638
639 This requires a full perl installation - many distributions (Debian
640 GNU/Linux for example) don't package all library files that perl installs.
641
642 =cut
643
644 sub add_core {
645 my ($self) = @_;
646
647 my $lib = $self->{lib};
648
649 for (@{
650 $self->_read_packlist ($Config{privlibexp}, ".packlist")
651 }) {
652 $self->{set}{$_} ||= [
653 "$self->{libdir}/"
654 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
655 ];
656 }
657 }
658
659 =back
660
661 =head2 GLOB-BASED ADDING AND FILTERING
662
663 These methods add or manipulate files by using glob-based patterns.
664
665 These glob patterns work similarly to glob patterns in the shell:
666
667 =over 4
668
669 =item /
670
671 A F</> at the start of the pattern interprets the pattern as a file
672 path inside the file set, almost the same as in the shell. For example,
673 F</bin/perl*> would match all files whose names starting with F<perl>
674 inside the F<bin> directory in the set.
675
676 If the F</> is missing, then the pattern is interpreted as a module name
677 (a F<.pm> file). For example, F<Coro> matches the file F<libdir/Coro.pm>
678 (where F<libdir> is the perl library directory), while F<Coro::*> would
679 match F<libdir/Coro/*.pm>.
680
681 =item *
682
683 A single star matches anything inside a single directory component. For
684 example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
685 F<lib/Coro/> directory, but not any files deeper in the hierarchy.
686
687 Another way to look at it is that a single star matches anything but a
688 slash (F</>).
689
690 =item **
691
692 A double star matches any number of characters in the path, including F</>.
693
694 For example, F<AnyEvent::**> would match all modules whose names start
695 with C<AnyEvent::>, no matter how deep in the hierarchy they are.
696
697 =back
698
699 =cut
700
701 sub _extglob2re {
702 for (quotemeta $_[1]) {
703 s/\\\*\\\*/.*/g;
704 s/\\\*/[^\/]*/g;
705 s/\\\?/[^\/]/g;
706
707 unless (s%^\\/%%) {
708 s%\\:\\:%/%g;
709 $_ = (quotemeta $_[0]{libdir}) . "/$_\\.pm";
710 }
711
712 $_ .= '$';
713 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
714
715 return qr<^$_>s
716 }
717 }
718
719 =over 4
720
721 =item $extractor->add_glob ($modglob[, $modglob...])
722
723 Adds all files from the perl library that match the given glob pattern.
724
725 For example, you could implement C<add_unicore> yourself like this:
726
727 $extractor->add_glob ("/unicore/**.pl");
728
729 =cut
730
731 sub add_glob {
732 my $self = shift;
733
734 for (@_) {
735 my $pat = $self->_extglob2re ($_);
736 $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
737 }
738 }
739
740 =item $extractor->filter ($pattern[, $pattern...])
741
742 Applies a series of include/exclude filters. Each filter must start with
743 either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
744 pattern. The rest of the pattern is a normal glob pattern.
745
746 An exclude pattern (C<->) instantly removes all matching files from
747 the set. An include pattern (C<+>) protects matching files from later
748 removals.
749
750 That is, if you have an include pattern then all files that were matched
751 by it will be included in the set, regardless of any further exclude
752 patterns matching the same files.
753
754 Likewise, any file excluded by a pattern will not be included in the set,
755 even if matched by later include patterns.
756
757 Any files not matched by any expression will simply stay in the set.
758
759 For example, to remove most of the useless autoload functions by the POSIX
760 module (they either do the same thing as a builtin or always raise an
761 error), you would use this (assuming a default C<libdir>):
762
763 $extractor->filter ("-/lib/auto/POSIX/*.al");
764
765 This does not remove all autoload files, only the ones not defined by a
766 subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
767
768 =cut
769
770 sub filter {
771 my ($self, @patterns) = @_;
772
773 $self->_trace_flush;
774
775 my $set = $self->{set};
776 my %include;
777
778 for my $pat (@patterns) {
779 $pat =~ s/^([+\-])//
780 or croak "$_: not a valid filter pattern (missing + or - prefix)";
781 my $inc = $1 eq "+";
782 $pat = $self->_extglob2re ($pat);
783
784 my @match = grep /$pat/, keys %$set;
785
786 if ($inc) {
787 @include{@match} = delete @$set{@match};
788 } else {
789 delete @$set{@{ $_->[I_DEP] }} # remove dependents
790 for delete @$set{@match};
791 }
792 }
793
794 my @include = keys %include;
795 @$set{@include} = delete @include{@include};
796 }
797
798 =item $extractor->runtime_only
799
800 This removes all files that are not needed at runtime, such as static
801 archives, header and other files needed only for compilation of modules,
802 and pod and html files (which are unlikely to be needed at runtime).
803
804 This is quite useful when you want to have only files actually needed to
805 execute a program.
806
807 =cut
808
809 sub runtime_only {
810 my ($self) = @_;
811
812 $self->_trace_flush;
813
814 my $set = $self->{set};
815
816 # delete all static libraries, also windows stuff
817 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
818
819 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
820 delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
821
822 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
823 delete @$set{ grep m%^\Q$self->{libdir}\E/.*\.(?:pod|h|html)$%s, keys %$set };
824
825 # delete unneeded unicore files
826 delete @$set{ grep m%^\Q$self->{libdir}\E/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
827 }
828
829 =back
830
831 =head2 RESULT SET
832
833 =over 4
834
835 =item $set = $extractor->set
836
837 Returns a hash reference that represents the result set. The hash is the
838 actual internal storage hash and can only be modified as described below.
839
840 Each key in the hash is the path inside the set, without a leading slash,
841 e.g.:
842
843 bin/perl
844 lib/unicore/lib/Blk/Superscr.pl
845 lib/AnyEvent/Impl/EV.pm
846
847 The value is an array reference with mostly unspecified contents, except
848 the first element, which is the file system path where the actual file can
849 be found.
850
851 This code snippet lists all files inside the set:
852
853 print "$_\n"
854 for sort keys %{ $extractor->set });
855
856 This code fragment prints C<< filesystem_path => set_path >> pairs for all
857 files in the set:
858
859 my $set = $extractor->set;
860 while (my ($set,$fspath) = each %$set) {
861 print "$fspath => $set\n";
862 }
863
864 You can implement your own filtering by asking for the result set with
865 C<< $extractor->set >>, and then deleting keys from the referenced hash
866 - since you can ask for the result set at any time you can add things,
867 filter them out this way, and add additional things.
868
869 =back
870
871 =cut
872
873 sub set {
874 $_[0]->_trace_flush;
875 $_[0]{set}
876 }
877
878 =head1 EXAMPLE
879
880 To package he deliantra client (L<Deliantra::Client>), finding all
881 (perl) files needed to run it is a first step. This can be done by using
882 something like the following code snippet:
883
884 my $ex = new Perl::LibExtractor
885 exedir => ".", dlldir => ".",
886 libdir => "pm", bindir => "pm/bin";
887
888 $ex->add_perl;
889 $ex->add_core_support;
890 $ex->add_script ("deliantra");
891 $ex->add_mod ("AnyEvent::Impl::EV");
892 $ex->add_mod ("AnyEvent::Impl::Perl");
893 $ex->add_mod ("Urlader");
894 $ex->filter ("-/*/auto/POSIX/**.al");
895 $ex->runtime_only;
896
897 Let's first find out about the choice of paths for the subset. The
898 Deliantra client binary packages use L<Urlader> nowadays, and there it is
899 convenient to have F<perl> and any shared libraries directly in the root
900 of the distribution.
901
902 The perl library files are put into a directory named F<pm>, simply
903 because it's shorter than F<lib>, and in the future, some files might go
904 into F<lib>.
905
906 And finally, the F<deliantra> script itself is put into the perl library
907 directory, because it is not run directly - the installed client uses the
908 system fonts and other resources, while the binary package is supposed
909 to use the files packaged with it. To achieve this, a wrapper script is
910 created, called F<run>; which displays a splash screen and configures the
911 environment. A simplified version of it could look like this:
912
913 @INC = ("pm", "."); # "." required by newer AutoLoader grrrr.
914 $ENV{PANGO_RC_FILE} = "pango.rc";
915 require "bin/deliantra";
916 exit 0;
917
918 First it sets the perl library directory to F<pm> and F<.> (the latter
919 to work around some AutoLoader bugs), so perl uses only the perl library
920 files that came with the binary package.
921
922 Then it sets some environment variable to override the system default
923 (which might be incompatible).
924
925 Then it runs the client itself, using C<require>. Since C<require> only
926 looks in the perl library directory this is the reaosn why the scripts
927 were put there (of course, since F<.> is also included it doesn't matter,
928 but I refuse to yield to bugs).
929
930 Finally it exits with a clean status to signal "ok" to Urlader.
931
932 Back to the original C<Perl::LibExtractor> script: after initialising a
933 new set, the script simply adds the F<perl> interpreter and core support
934 files (just in case, not all are needed, but some are, and I am too lazy
935 to find out which ones exactly).
936
937 Then it adds the deliantra executable itself, which in turn adds most of
938 the required modules. After that, the AnyEvent implementation modules are
939 added because these dependencies are not picked up automatically.
940
941 The L<Urlader> module is added because the client itself does not depend
942 on it at all, but the wrapper does.
943
944 At this point, all required files are present, and it's time to slim
945 down: most of the ueseless POSIX autoloaded functions are removed,
946 not because they are so big, but because creating files is a costly
947 operation in itself, so even small fiels have considerable overhead when
948 unpacking. Then files not required for running the client are removed.
949
950 And that concludes it, the set is now ready.
951
952 =head1 SEE ALSO
953
954 The utility program that comes with this module: L<perl-libextract>.
955
956 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
957
958 =head1 LICENSE
959
960 This software package is licensed under the GPL version 3 or any later
961 version, see COPYING for details.
962
963 This license does not, of course, apply to any output generated by this
964 software.
965
966 =head1 AUTHOR
967
968 Marc Lehmann <schmorp@schmorp.de>
969 http://home.schmorp.de/
970
971 =cut
972
973 1;
974