ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.13
Committed: Fri Jan 20 20:07:59 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.12: +5 -2 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set
25 of files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =cut
33
34 package Perl::LibExtractor;
35
36 our $VERSION = '0.1';
37
38 use Config;
39 use File::Spec ();
40 use File::Temp ();
41
42 use common::sense;
43
44 sub I_SRC () { 0 }
45 sub I_DEP () { 1 }
46
47 sub croak($) {
48 require Carp;
49 Carp::croak "(Perl::LibExtractor) $_[0]";
50 }
51
52 my $canonpath = File::Spec->can ("canonpath");
53 my $case_tolerant = File::Spec->case_tolerant;
54
55 sub canonpath($) {
56 local $_ = $canonpath->(File::Spec::, $_[0]);
57 s%\\%/%g;
58 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 $_
60 }
61
62 =head2 CREATION
63
64 =over 4
65
66 =item $extractor = new Perl::LibExtractor [key => value...]
67
68 Creates a new extractor object. Each extractor object stores some
69 configuration options and a subset of files that can be queried at any
70 time,.
71
72 Binary executables (such as the perl interpreter) are stored inside
73 F<bin/>, perl scripts are stored under F<script/>, perl library files are
74 stored under F<lib/> and shared libraries are stored under F<dll/>.
75
76 The following key-value pairs exist, with default values as specified.
77
78 =over 4
79
80 =item inc => \@INC without "."
81
82 An arrayref with paths to perl library directories. The default is
83 C<\@INC>, with F<.> removed.
84
85 To prepend custom dirs just do this:
86
87 inc => ["mydir", @INC],
88
89 =item use_packlist => 1
90
91 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 then each time a file is traced, the complete distribution that contains
93 it is included (but not traced).
94
95 If disabled, only shared objects and autoload files will be added.
96
97 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98 this option will fail. Other perls should be fine.
99
100 =back
101
102 =cut
103
104 sub new {
105 my ($class, %kv) = @_;
106
107 my $self = bless {
108 inc => [grep $_ ne ".", @INC],
109 use_packlist => 1,
110 %kv,
111 set => {},
112 }, $class;
113
114 my %inc_seen;
115 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
116 $self->{inc} = \@inc;
117
118 $self->_set_inc;
119
120 $self
121 }
122
123 sub _perl_path() {
124 my $secure_perl_path = $Config{perlpath};
125
126 if ($^O ne 'VMS') {
127 $secure_perl_path .= $Config{_exe}
128 unless $secure_perl_path =~ m/$Config{_exe}$/i;
129 }
130
131 $secure_perl_path
132 }
133
134 sub _path2match {
135 my $re = join "|", map "\Q$_", @_;
136
137 $re = "(?:$re)\\/";
138 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
139
140 $case_tolerant
141 ? qr<$re>i
142 : qr<$re>
143 }
144
145 sub _read_packlist {
146 my ($self, $root, $path) = @_;
147
148 my $lib = $self->{lib};
149
150 my @packlist;
151
152 open my $fh, "<:perlio", "$root/$path"
153 or die "$root/$path: $!";
154
155 $root = _path2match $root;
156
157 while (<$fh>) {
158 chomp;
159 s/ .*$//; # newer-style .packlists might contain key=value pairs
160
161 s/$root// and exists $lib->{$_}
162 or next;
163
164 push @packlist, canonpath $_;
165 }
166
167 \@packlist
168 }
169
170 sub _set_inc {
171 my ($self) = @_;
172
173 my $matchprefix = _path2match @{ $self->{inc }};
174
175 my %lib;
176 my @packlists;
177
178 # find all files in all libdirs, earlier ones overwrite later ones
179 my @scan = map [$_, ""], @{ $self->{inc} };
180
181 while (@scan) {
182 my ($root, $dir) = @{ pop @scan };
183
184 my $pfx = length $dir ? "$dir/" : "";
185
186 for (do {
187 opendir my $fh, "$root/$dir"
188 or croak "$root/$dir: $!";
189 grep !/^\.\.?$/, readdir $fh
190 }) {
191 if (-d "$root/$dir/$_/.") {
192 $lib{"$pfx$_/"} = "$root/$pfx$_";
193 push @scan, [$root, "$pfx$_"];
194 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
195 push @packlists, [$root, $pfx];
196 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
197 # skip empty .bs files
198 # } elsif (/\.(?:pod|h|html)$/) {
199 # # not interested in those
200 } else {
201 #push @files, $_;
202 $lib{"$pfx$_"} = "$root/$pfx$_";
203 }
204 }
205
206 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
207 }
208
209 my %packlist;
210
211 # need to go forward here
212 for (@packlists) {
213 my $packlist = $self->_read_packlist ($_->[0], "$_->[1]/.packlist");
214
215 $packlist{$_} = $packlist
216 for @$packlist;
217 }
218
219 $self->{lib} = \%lib;
220 $self->{packlist} = \%packlist;
221 $self->{matchprefix} = $matchprefix;
222 }
223
224 =back
225
226 =head2 TRACE/PACKLIST BASED ADDING
227
228 The following methods add various things to the set of files.
229
230 Each time a perl file is added, it is scanned by tracing either loading,
231 execution or compiling it, and seeing which other perl modules and
232 libraries have been loaded.
233
234 For each library file found this way, additional dependencies are added:
235 if packlists are enabled, then all files of the distribution that contains
236 the file will be added. If packlists are disabled, then only shared
237 objects and autoload files for modules will be added.
238
239 Only files from perl library directories will be added automatically. Any
240 other files (such as manpages or scripts installed in the F<bin>
241 directory) are skipped.
242
243 If there is an error, such as a module not being found, then this module
244 croaks (as opposed to silently skipping). If you want to add something of
245 which you are not sure it exists, then you can wrap the call into C<eval
246 {}>. In some cases, you can avoid this by executing the code you want
247 to work later using C<add_eval> - see C<add_core_support> for an actual
248 example of this technique.
249
250 Note that packlists are meant to add files not covered by other
251 mechanisms, such as resource files and other data files loaded directly by
252 a module - they are not meant to add dependencies that are missed because
253 they only happen at runtime.
254
255 For example, with packlists, when using L<AnyEvent>, then all event loop
256 backends are automatically added as well, but I<not> any event loops
257 (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
258 packlists, only the backend that is being used is added (i.e. normally
259 none, as loading AnyEvent does not instantly load any backend).
260
261 To catch the extra event loop dependencies, you can either initialise
262 AnyEvent so it picks a suitable backend:
263
264 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
265
266 Or you can directly load the backend modules you plan to use:
267
268 $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
269
270 An example of a program (or module) that has extra resource files is
271 L<Deliantra::Client> - the normal tracing (without packlist usage) will
272 correctly add all submodules, but miss the fonts and textures. By using
273 the packlist, those files are added correctly.
274
275 =over 4
276
277 =cut
278
279 sub _add {
280 my ($self, $add) = @_;
281
282 my $lib = $self->{lib};
283 my $path;
284
285 for (@$add) {
286 $path = "lib/$_";
287
288 $self->{set}{$path} ||= do {
289 my @info;
290
291 $info[I_SRC] = $lib->{$_}
292 or croak "$_: unable to locate file in perl library";
293
294 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
295 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
296 for @{ $self->{packlist}{$_} };
297
298 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
299 # s/\.pm$//;
300 # s%/%::%g;
301 # my $pkg = "libextractor" . ++$self->{count};
302 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
303 # unless $self->{_add_do}{$_}++;
304 # }
305 #
306 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
307 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
308
309 } elsif (/^(.*)\.pm$/) {
310 (my $auto = "auto/$1/") =~ s%::%/%g;
311 $auto =~ m%/([^/]+)/$% or die;
312 my $base = $1;
313
314 if (exists $lib->{$auto}) {
315 # auto dir exists, scan it for cool stuff
316
317 # 1. shared object, others are of no interest to us
318 my $so = "$auto$base.$Config{dlext}";
319 if (my $src = $lib->{$so}) {
320 $so = "lib/$so";
321 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
322 }
323
324 # 2. autoloader/autosplit
325 my $ix = "${auto}autosplit.ix";
326 if (my $src = $lib->{$ix}) {
327 $ix = "lib/$ix";
328 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
329
330 open my $fh, "<:perlio", $src
331 or croak "$src: $!";
332
333 my $package;
334
335 while (<$fh>) {
336 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
337 my $al = "auto/$package/$1.al";
338 my $src = $lib->{$al}
339 or croak "$al: autoload file not found, but should be there.";
340
341 $al = "lib/$al";
342 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
343
344 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
345 ($package = $1) =~ s/::/\//g;
346 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
347 # nop
348 } else {
349 warn "WARNING: $src: unparsable line, please report: $_";
350 }
351 }
352 }
353
354 skip:
355 }
356 }
357
358 \@info
359 };
360 }
361 }
362
363 sub _trace {
364 my ($self, $file, $eval) = @_;
365
366 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
367 }
368
369 sub _trace_flush {
370 my ($self) = @_;
371
372 # ->_add might add additional files to trace
373 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
374 my $tmpdir = newdir File::Temp;
375 my $dir = $tmpdir->dirname;
376
377 open my $fh, ">:perlio", "$dir/eval"
378 or croak "$dir/eval: $!";
379 syswrite $fh,
380 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
381 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
382 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
383 . "CHECK {\n"
384 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
385 . 'print join "\x00", values %INC;'
386 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
387 . "}\n"
388 . (delete $self->{trace_check});
389 close $fh;
390
391 system _perl_path, "-c", "$dir/eval"
392 and croak "trace failure, check trace process output - caught";
393
394 my @inc = split /\x00/, do {
395 open my $fh, "<:perlio", "$dir/out"
396 or croak "$dir/out: $!";
397 local $/;
398 scalar readline $fh
399 };
400
401 my $matchprefix = $self->{matchprefix};
402
403 # remove the library directory prefix, hope for the best
404 s/$matchprefix//
405 or croak "$_: file outside any library directory"
406 for @inc;
407
408 $self->_add (\@inc);
409 }
410 }
411
412 =item $extractor->add_mod ($module[, $module...])
413
414 Adds the given module(s) to the file set - the module name must be specified
415 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
416
417 The program will be loaded with the default import list, any dependent
418 files, such as the shared object implementing xs functions, or autoload
419 files, will also be added.
420
421 If you want to use a different import list (for those rare modules wghere
422 import lists trigger different backend modules to be loaded for example),
423 you can use C<add_eval> instead:
424
425 $extractor->add_eval ("use Module qw(a b c)");
426
427 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
428 from the distribution they are part of.
429
430 $extractor->add_mod ("Coro", "AnyEvent::AIO");
431
432 =cut
433
434 sub add_mod {
435 my $self = shift;
436
437 for (@_) {
438 my $pkg = "libextractor" . ++$self->{count};
439 $self->_trace ("use $_", "{ package $pkg; use $_ }")
440 unless $self->{add_mod}{$_}++;
441 }
442 }
443
444 =item $extractor->add_bin ($name[, $name...])
445
446 Adds the given (perl) program(s) to the file set, that is, a program
447 installed by some perl module, written in perl (an example would be the
448 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
449 distribution).
450
451 Example: add the deliantra client program installed by the
452 L<Deliantra::Client> module and put it under F<bin/deliantra>.
453
454 $extractor->add_bin ("deliantra");
455
456 =cut
457
458 sub add_bin {
459 my $self = shift;
460
461 exe:
462 for my $exe (@_) {
463 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
464 if (open my $fh, "<:perlio", "$dir/$exe") {
465 if (-f $fh) {
466 my $file = do { local $/; readline $fh };
467
468 $self->_trace_flush if exists $self->{trace_check};
469 $self->{trace_check} = $file;
470
471 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
472 next exe;
473 }
474 }
475 }
476
477 croak "add_bin ($exe): executable not found";
478 }
479 }
480
481 =item $extractor->add_eval ($string)
482
483 Evaluates the string as perl code and adds all modules that are loaded
484 by it. For example, this would add L<AnyEvent> and the default backend
485 implementation module and event loop module:
486
487 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
488
489 Each code snippet will be executed in its own package and under C<use
490 strict>.
491
492 =cut
493
494 sub add_eval {
495 my ($self, $eval) = @_;
496
497 my $pkg = "libextractor" . ++$self->{count};
498 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
499 $self->_trace ($eval,
500 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
501 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
502 );
503 }
504
505 =back
506
507 =head2 OTHER METHODS FOR ADDING FILES
508
509 The following methods add commonly used files that are either not covered
510 by other methods or add commonly-used dependencies.
511
512 =over 4
513
514 =item $extractor->add_perl
515
516 Adds the perl binary itself to the file set, including the libperl dll, if
517 needed.
518
519 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
520 some F<dll/libperl.so.XXX>.
521
522 =cut
523
524 sub add_perl {
525 my ($self) = @_;
526
527 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
528
529 # on debian, we have the special case of a perl binary linked against
530 # a static libperl.a (which is not available), but the Config says to use
531 # a shared library, which is in the wrong directory, too (which breaks
532 # every other perl installation on the system - they are so stupid).
533
534 # that means we can't find the libperl.so, because dbeian actively breaks
535 # their perl install, and we don't need it. we work around this by silently
536 # not including the libperl if we cannot find it.
537
538 if ($Config{useshrplib} eq "true") {
539 my ($libperl, $libpath);
540
541 if ($^O eq "cygwin") {
542 $libperl = $Config{libperl};
543 $libpath = "$Config{binexp}/$libperl";
544 } elsif ($^O eq "MSWin32") {
545 ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
546 $libpath = "$Config{binexp}/$libperl";
547 } else {
548 $libperl = $Config{libperl};
549 $libpath = $self->{lib}{"CORE/$libperl"};
550 }
551
552 $self->{set}{"dll/$libperl"} = $libpath
553 if length $libpath && -e $libpath;
554 }
555 }
556
557 =item $extractor->add_core_support
558
559 Try to add modules and files needed to support commonly-used builtin
560 language features. For example to open a scalar for I/O you need the
561 L<PerlIO::scalar> module:
562
563 open $fh, "<", \$scalar
564
565 A number of regex and string features (e.g. C<ucfirst>) need some unicore
566 files, e.g.:
567
568 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
569
570 This call adds these files (simply by executing code similar to the above
571 code fragments).
572
573 Notable things that are missing are other PerlIO layers, such as
574 L<PerlIO::encoding>, and named character and character class matches.
575
576 =cut
577
578 sub add_core_support {
579 my ($self) = @_;
580
581 $self->add_eval ('my $v; open my $fh, "<", \$v');
582 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
583 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
584 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
585 }
586
587 =item $extractor->add_unicore
588
589 Adds (hopefully) all files from the unicore database that will ever be
590 needed.
591
592 If you are not sure which unicode character classes and similar unicore
593 databases you need, and you do not care about an extra one thousand(!)
594 files comprising 4MB of data, then you can just call this method, which
595 adds basically all files from perl's unicode database.
596
597 Note that C<add_core_support> also adds some unicore files, but it's not a
598 subset of C<add_unicore> - the former adds all files neccessary to support
599 core builtins (which includes some unicore files and other things), while
600 the latter adds all unicore files (but nothing else).
601
602 When in doubt, use both.
603
604 =cut
605
606 sub add_unicore {
607 my ($self) = @_;
608
609 $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
610 }
611
612 =item $extractor->add_core
613
614 This adds all files from the perl core distribution, that is, all library
615 files that come with perl.
616
617 This is a superset of C<add_core_support> and C<add_unicore>.
618
619 This is quite a lot, but on the plus side, you can be sure nothing is
620 missing.
621
622 This requires a full perl installation - Debian GNU/Linux doesn't package
623 the full perl library, so this function will not work there.
624
625 =cut
626
627 sub add_core {
628 my ($self) = @_;
629
630 my $lib = $self->{lib};
631
632 for (@{
633 $self->_read_packlist ($Config{privlibexp}, ".packlist")
634 }) {
635 $self->{set}{$_} ||= [
636 "lib/"
637 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
638 ];
639 }
640 }
641
642 =back
643
644 =head2 GLOB-BASED ADDING AND FILTERING
645
646 These methods add or manipulate files by using glob-based patterns.
647
648 These glob patterns work similarly to glob patterns in the shell:
649
650 =over 4
651
652 =item /
653
654 A F</> at the start of the pattern interprets the pattern as a file
655 path inside the file set, almost the same as in the shell. For example,
656 F</bin/perl*> would match all files whose names starting with F<perl>
657 inside the F<bin> directory in the set.
658
659 If the F</> is missing, then the pattern is interpreted as a module name
660 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
661 while F<Coro::*> would match F<lib/Coro/*.pm>.
662
663 =item *
664
665 A single star matches anything inside a single directory component. For
666 example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
667 F<lib/Coro/> directory, but not any files deeper in the hierarchy.
668
669 Another way to look at it is that a single star matches anything but a
670 slash (F</>).
671
672 =item **
673
674 A double star matches any number of characters in the path, including F</>.
675
676 For example, F<AnyEvent::**> would match all modules whose names start
677 with C<AnyEvent::>, no matter how deep in the hierarchy they are.
678
679 =back
680
681 =cut
682
683 sub _extglob2re {
684 for (quotemeta $_[1]) {
685 s/\\\*\\\*/.*/g;
686 s/\\\*/[^\/]*/g;
687 s/\\\?/[^\/]/g;
688
689 unless (s%^\\/%%) {
690 s%\\:\\:%/%g;
691 $_ = "lib/$_\\.pm";
692 }
693
694 $_ .= '$';
695 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
696
697 return qr<^$_>s
698 }
699 }
700
701 =over 4
702
703 =item $extractor->add_glob ($modglob[, $modglob...])
704
705 Adds all files from the perl library that match the given glob pattern.
706
707 For example, you could implement C<add_unicore> yourself like this:
708
709 $extractor->add_glob ("/unicore/**.pl");
710
711 =cut
712
713 sub add_glob {
714 my $self = shift;
715
716 for (@_) {
717 my $pat = $self->_extglob2re ($_);
718 $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
719 }
720 }
721
722 =item $extractor->filter ($pattern[, $pattern...])
723
724 Applies a series of include/exclude filters. Each filter must start with
725 either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
726 pattern. The rest of the pattern is a normal glob pattern.
727
728 An exclude pattern (C<->) instantly removes all matching files from
729 the set. An include pattern (C<+>) protects matching files from later
730 removals.
731
732 That is, if you have an include pattern then all files that were matched
733 by it will be included in the set, regardless of any further exclude
734 patterns matching the same files.
735
736 Likewise, any file excluded by a pattern will not be included in the set,
737 even if matched by later include patterns.
738
739 Any files not matched by any expression will simply stay in the set.
740
741 For example, to remove most of the useless autoload functions by the POSIX
742 module (they either do the same thing as a builtin or always raise an
743 error), you would use this:
744
745 $extractor->filter ("-/lib/auto/POSIX/*.al");
746
747 This does not remove all autoload files, only the ones not defined by a
748 subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
749
750 =cut
751
752 sub filter {
753 my ($self, @patterns) = @_;
754
755 $self->_trace_flush;
756
757 my $set = $self->{set};
758 my %include;
759
760 for my $pat (@patterns) {
761 $pat =~ s/^([+\-])//
762 or croak "$_: not a valid filter pattern (missing + or - prefix)";
763 my $inc = $1 eq "+";
764 $pat = $self->_extglob2re ($pat);
765
766 my @match = grep /$pat/, keys %$set;
767
768 if ($inc) {
769 @include{@match} = delete @$set{@match};
770 } else {
771 delete @$set{@{ $_->[I_DEP] }} # remove dependents
772 for delete @$set{@match};
773 }
774 }
775
776 my @include = keys %include;
777 @$set{@include} = delete @include{@include};
778 }
779
780 =item $extractor->runtime_only
781
782 This removes all files that are not needed at runtime, such as static
783 archives, header and other files needed only for compilation of modules,
784 and pod and html files (which are unlikely to be needed at runtime).
785
786 This is quite useful when you want to have only files actually needed to
787 execute a program.
788
789 =cut
790
791 sub runtime_only {
792 my ($self) = @_;
793
794 $self->_trace_flush;
795
796 my $set = $self->{set};
797
798 # delete all static libraries, also windows stuff
799 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
800
801 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
802 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
803
804 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
805 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
806
807 # delete unneeded unicore files
808 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
809 }
810
811 =back
812
813 =head2 RESULT SET
814
815 =over 4
816
817 =item $set = $extractor->set
818
819 Returns a hash reference that represents the result set. The hash is the
820 actual internal storage hash and can only be modified as described below.
821
822 Each key in the hash is the path inside the set, without a leading slash,
823 e.g.:
824
825 bin/perl
826 lib/unicore/lib/Blk/Superscr.pl
827 lib/AnyEvent/Impl/EV.pm
828
829 The value is an array reference with mostly unspecified contents, except
830 the first element, which is the file system path where the actual file can
831 be found.
832
833 This code snippet lists all files inside the set:
834
835 print "$_\n"
836 for sort keys %{ $extractor->set });
837
838 This code fragment prints C<< filesystem_path => set_path >> pairs for all
839 files in the set:
840
841 my $set = $extractor->set;
842 while (my ($set,$fspath) = each %$set) {
843 print "$fspath => $set\n";
844 }
845
846 You can implement your own filtering by asking for the result set with
847 C<< $extractor->set >>, and then deleting keys from the referenced hash
848 - since you can ask for the result set at any time you can add things,
849 filter them out this way, and add additional things.
850
851 =back
852
853 =cut
854
855 sub set {
856 $_[0]->_trace_flush;
857 $_[0]{set}
858 }
859
860 =head1 EXAMPLE
861
862 To package he deliantra client (L<Deliantra::Client>), finding all
863 (perl) files needed to run it is a first step. This can be done by using
864 something like the following code snippet:
865
866 my $ex = new Perl::LibExtractor;
867
868 $ex->add_perl;
869 $ex->add_core_support;
870 $ex->add_bin ("deliantra");
871 $ex->add_mod ("AnyEvent::Impl::EV");
872 $ex->add_mod ("AnyEvent::Impl::Perl");
873 $ex->add_mod ("Urlader");
874 $ex->filter ("-/*/auto/POSIX/**.al");
875 $ex->runtime_only;
876
877 First it sets the perl library directory to F<pm> and F<.> (the latter
878 to work around some AutoLoader bugs), so perl uses only the perl library
879 files that came with the binary package.
880
881 Then it sets some environment variable to override the system default
882 (which might be incompatible).
883
884 Then it runs the client itself, using C<require>. Since C<require> only
885 looks in the perl library directory this is the reaosn why the scripts
886 were put there (of course, since F<.> is also included it doesn't matter,
887 but I refuse to yield to bugs).
888
889 Finally it exits with a clean status to signal "ok" to Urlader.
890
891 Back to the original C<Perl::LibExtractor> script: after initialising a
892 new set, the script simply adds the F<perl> interpreter and core support
893 files (just in case, not all are needed, but some are, and I am too lazy
894 to find out which ones exactly).
895
896 Then it adds the deliantra executable itself, which in turn adds most of
897 the required modules. After that, the AnyEvent implementation modules are
898 added because these dependencies are not picked up automatically.
899
900 The L<Urlader> module is added because the client itself does not depend
901 on it at all, but the wrapper does.
902
903 At this point, all required files are present, and it's time to slim
904 down: most of the ueseless POSIX autoloaded functions are removed,
905 not because they are so big, but because creating files is a costly
906 operation in itself, so even small fiels have considerable overhead when
907 unpacking. Then files not required for running the client are removed.
908
909 And that concludes it, the set is now ready.
910
911 =head1 SEE ALSO
912
913 The utility program that comes with this module: L<perl-libextract>.
914
915 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
916
917 =head1 LICENSE
918
919 This software package is licensed under the GPL version 3 or any later
920 version, see COPYING for details.
921
922 This license does not, of course, apply to any output generated by this
923 software.
924
925 =head1 AUTHOR
926
927 Marc Lehmann <schmorp@schmorp.de>
928 http://home.schmorp.de/
929
930 =cut
931
932 1;
933