ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.15
Committed: Thu Jan 26 01:24:25 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.14: +14 -15 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set
25 of files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =cut
33
34 package Perl::LibExtractor;
35
36 our $VERSION = '0.1';
37
38 use Config;
39 use File::Spec ();
40 use File::Temp ();
41
42 use common::sense;
43
44 sub I_SRC () { 0 }
45 sub I_DEP () { 1 }
46
47 sub croak($) {
48 require Carp;
49 Carp::croak "(Perl::LibExtractor) $_[0]";
50 }
51
52 my $canonpath = File::Spec->can ("canonpath");
53 my $case_tolerant = File::Spec->case_tolerant;
54
55 sub canonpath($) {
56 local $_ = $canonpath->(File::Spec::, $_[0]);
57 s%\\%/%g;
58 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 $_
60 }
61
62 =head2 CREATION
63
64 =over 4
65
66 =item $extractor = new Perl::LibExtractor [key => value...]
67
68 Creates a new extractor object. Each extractor object stores some
69 configuration options and a subset of files that can be queried at any
70 time,.
71
72 Binary executables (such as the perl interpreter) are stored inside
73 F<bin/>, perl scripts are stored under F<script/>, perl library files are
74 stored under F<lib/> and shared libraries are stored under F<dll/>.
75
76 The following key-value pairs exist, with default values as specified.
77
78 =over 4
79
80 =item inc => \@INC without "."
81
82 An arrayref with paths to perl library directories. The default is
83 C<\@INC>, with F<.> removed.
84
85 To prepend custom dirs just do this:
86
87 inc => ["mydir", @INC],
88
89 =item use_packlist => 1
90
91 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 then each time a file is traced, the complete distribution that contains
93 it is included (but not traced).
94
95 If disabled, only shared objects and autoload files will be added.
96
97 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98 this option will fail. Other perls should be fine.
99
100 =back
101
102 =cut
103
104 sub new {
105 my ($class, %kv) = @_;
106
107 my $self = bless {
108 inc => [grep $_ ne ".", @INC],
109 use_packlist => 1,
110 %kv,
111 set => {},
112 }, $class;
113
114 my %inc_seen;
115 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
116 $self->{inc} = \@inc;
117
118 # maybe not inc, but these?
119 # sitearchexp
120 # sitelib
121 # vendorarchexp
122 # vendorlibexp
123 # archlibexp
124 # privlibexp
125
126 $self->_set_inc;
127
128 $self
129 }
130
131 sub _perl_path() {
132 my $secure_perl_path = $Config{perlpath};
133
134 if ($^O ne 'VMS') {
135 $secure_perl_path .= $Config{_exe}
136 unless $secure_perl_path =~ m/$Config{_exe}$/i;
137 }
138
139 $secure_perl_path
140 }
141
142 sub _path2match {
143 my $re = join "|", map "\Q$_", @_;
144
145 $re = "(?:$re)\\/";
146 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
147
148 $case_tolerant
149 ? qr<$re>i
150 : qr<$re>
151 }
152
153 sub _read_packlist {
154 my ($self, $path) = @_;
155
156 my $pfxmatch = $self->{pfxmatch};
157 my $lib = $self->{lib};
158
159 my @packlist;
160
161 open my $fh, "<:perlio", $path
162 or die "$path: $!";
163
164 while (<$fh>) {
165 chomp;
166 s/ .*$//; # newer-style .packlists might contain key=value pairs
167
168 s/$pfxmatch// and exists $lib->{$_}
169 or next;
170
171 push @packlist, canonpath $_;
172 }
173
174 \@packlist
175 }
176
177 sub _set_inc {
178 my ($self) = @_;
179
180 my $pfxmatch = _path2match @{ $self->{inc }};
181
182 my %lib;
183 my @packlists;
184
185 # find all files in all libdirs, earlier ones overwrite later ones
186 my @scan = map [$_, ""], @{ $self->{inc} };
187
188 while (@scan) {
189 my ($root, $dir) = @{ pop @scan };
190
191 my $pfx = length $dir ? "$dir/" : "";
192
193 for (do {
194 opendir my $fh, "$root/$dir"
195 or croak "$root/$dir: $!";
196 grep !/^\.\.?$/, readdir $fh
197 }) {
198 if (-d "$root/$dir/$_/.") {
199 $lib{"$pfx$_/"} = "$root/$pfx$_";
200 push @scan, [$root, "$pfx$_"];
201 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
202 push @packlists, "$root/$pfx.packlist";
203 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
204 # skip empty .bs files
205 # } elsif (/\.(?:pod|h|html)$/) {
206 # # not interested in those
207 } else {
208 #push @files, $_;
209 $lib{"$pfx$_"} = "$root/$pfx$_";
210 }
211 }
212
213 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
214 }
215
216 $self->{lib} = \%lib;
217 $self->{pfxmatch} = $pfxmatch;
218
219 my %packlist;
220
221 # need to go forward here
222 for (@packlists) {
223 my $packlist = $self->_read_packlist ($_);
224
225 $packlist{$_} = $packlist
226 for @$packlist;
227 }
228
229 $self->{packlist} = \%packlist;
230 }
231
232 =back
233
234 =head2 TRACE/PACKLIST BASED ADDING
235
236 The following methods add various things to the set of files.
237
238 Each time a perl file is added, it is scanned by tracing either loading,
239 execution or compiling it, and seeing which other perl modules and
240 libraries have been loaded.
241
242 For each library file found this way, additional dependencies are added:
243 if packlists are enabled, then all files of the distribution that contains
244 the file will be added. If packlists are disabled, then only shared
245 objects and autoload files for modules will be added.
246
247 Only files from perl library directories will be added automatically. Any
248 other files (such as manpages or scripts installed in the F<bin>
249 directory) are skipped.
250
251 If there is an error, such as a module not being found, then this module
252 croaks (as opposed to silently skipping). If you want to add something of
253 which you are not sure it exists, then you can wrap the call into C<eval
254 {}>. In some cases, you can avoid this by executing the code you want
255 to work later using C<add_eval> - see C<add_core_support> for an actual
256 example of this technique.
257
258 Note that packlists are meant to add files not covered by other
259 mechanisms, such as resource files and other data files loaded directly by
260 a module - they are not meant to add dependencies that are missed because
261 they only happen at runtime.
262
263 For example, with packlists, when using L<AnyEvent>, then all event loop
264 backends are automatically added as well, but I<not> any event loops
265 (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
266 packlists, only the backend that is being used is added (i.e. normally
267 none, as loading AnyEvent does not instantly load any backend).
268
269 To catch the extra event loop dependencies, you can either initialise
270 AnyEvent so it picks a suitable backend:
271
272 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
273
274 Or you can directly load the backend modules you plan to use:
275
276 $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
277
278 An example of a program (or module) that has extra resource files is
279 L<Deliantra::Client> - the normal tracing (without packlist usage) will
280 correctly add all submodules, but miss the fonts and textures. By using
281 the packlist, those files are added correctly.
282
283 =over 4
284
285 =cut
286
287 sub _add {
288 my ($self, $add) = @_;
289
290 my $lib = $self->{lib};
291 my $path;
292
293 for (@$add) {
294 $path = "lib/$_";
295
296 $self->{set}{$path} ||= do {
297 my @info;
298
299 $info[I_SRC] = $lib->{$_}
300 or croak "$_: unable to locate file in perl library";
301
302 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
303 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
304 for @{ $self->{packlist}{$_} };
305
306 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
307 # s/\.pm$//;
308 # s%/%::%g;
309 # my $pkg = "libextractor" . ++$self->{count};
310 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
311 # unless $self->{_add_do}{$_}++;
312 # }
313 #
314 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
315 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
316
317 } elsif (/^(.*)\.pm$/) {
318 (my $auto = "auto/$1/") =~ s%::%/%g;
319 $auto =~ m%/([^/]+)/$% or die;
320 my $base = $1;
321
322 if (exists $lib->{$auto}) {
323 # auto dir exists, scan it for cool stuff
324
325 # 1. shared object, others are of no interest to us
326 my $so = "$auto$base.$Config{dlext}";
327 if (my $src = $lib->{$so}) {
328 $so = "lib/$so";
329 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
330 }
331
332 # 2. autoloader/autosplit
333 my $ix = "${auto}autosplit.ix";
334 if (my $src = $lib->{$ix}) {
335 $ix = "lib/$ix";
336 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
337
338 open my $fh, "<:perlio", $src
339 or croak "$src: $!";
340
341 my $package;
342
343 while (<$fh>) {
344 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
345 my $al = "auto/$package/$1.al";
346 my $src = $lib->{$al}
347 or croak "$al: autoload file not found, but should be there.";
348
349 $al = "lib/$al";
350 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
351
352 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
353 ($package = $1) =~ s/::/\//g;
354 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
355 # nop
356 } else {
357 warn "WARNING: $src: unparsable line, please report: $_";
358 }
359 }
360 }
361
362 skip:
363 }
364 }
365
366 \@info
367 };
368 }
369 }
370
371 sub _trace {
372 my ($self, $file, $eval) = @_;
373
374 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
375 }
376
377 sub _trace_flush {
378 my ($self) = @_;
379
380 # ->_add might add additional files to trace
381 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
382 my $tmpdir = newdir File::Temp;
383 my $dir = $tmpdir->dirname;
384
385 open my $fh, ">:perlio", "$dir/eval"
386 or croak "$dir/eval: $!";
387 syswrite $fh,
388 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
389 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
390 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
391 . "CHECK {\n"
392 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
393 . 'print join "\x00", values %INC;'
394 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
395 . "}\n"
396 . (delete $self->{trace_check});
397 close $fh;
398
399 system _perl_path, "-c", "$dir/eval"
400 and croak "trace failure, check trace process output - caught";
401
402 my @inc = split /\x00/, do {
403 open my $fh, "<:perlio", "$dir/out"
404 or croak "$dir/out: $!";
405 local $/;
406 scalar readline $fh
407 };
408
409 my $pfxmatch = $self->{pfxmatch};
410
411 # remove the library directory prefix, hope for the best
412 s/$pfxmatch//
413 or croak "$_: file outside any library directory"
414 for @inc;
415
416 $self->_add (\@inc);
417 }
418 }
419
420 =item $extractor->add_mod ($module[, $module...])
421
422 Adds the given module(s) to the file set - the module name must be specified
423 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
424
425 The program will be loaded with the default import list, any dependent
426 files, such as the shared object implementing xs functions, or autoload
427 files, will also be added.
428
429 If you want to use a different import list (for those rare modules wghere
430 import lists trigger different backend modules to be loaded for example),
431 you can use C<add_eval> instead:
432
433 $extractor->add_eval ("use Module qw(a b c)");
434
435 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
436 from the distribution they are part of.
437
438 $extractor->add_mod ("Coro", "AnyEvent::AIO");
439
440 =cut
441
442 sub add_mod {
443 my $self = shift;
444
445 for (@_) {
446 my $pkg = "libextractor" . ++$self->{count};
447 $self->_trace ("use $_", "{ package $pkg; use $_ }")
448 unless $self->{add_mod}{$_}++;
449 }
450 }
451
452 =item $extractor->add_bin ($name[, $name...])
453
454 Adds the given (perl) program(s) to the file set, that is, a program
455 installed by some perl module, written in perl (an example would be the
456 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
457 distribution).
458
459 Example: add the deliantra client program installed by the
460 L<Deliantra::Client> module and put it under F<bin/deliantra>.
461
462 $extractor->add_bin ("deliantra");
463
464 =cut
465
466 sub add_bin {
467 my $self = shift;
468
469 exe:
470 for my $exe (@_) {
471 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
472 if (open my $fh, "<:perlio", "$dir/$exe") {
473 if (-f $fh) {
474 my $file = do { local $/; readline $fh };
475
476 $self->_trace_flush if exists $self->{trace_check};
477 $self->{trace_check} = $file;
478
479 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
480 next exe;
481 }
482 }
483 }
484
485 croak "add_bin ($exe): executable not found";
486 }
487 }
488
489 =item $extractor->add_eval ($string)
490
491 Evaluates the string as perl code and adds all modules that are loaded
492 by it. For example, this would add L<AnyEvent> and the default backend
493 implementation module and event loop module:
494
495 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
496
497 Each code snippet will be executed in its own package and under C<use
498 strict>.
499
500 =cut
501
502 sub add_eval {
503 my ($self, $eval) = @_;
504
505 my $pkg = "libextractor" . ++$self->{count};
506 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
507 $self->_trace ($eval,
508 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
509 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
510 );
511 }
512
513 =back
514
515 =head2 OTHER METHODS FOR ADDING FILES
516
517 The following methods add commonly used files that are either not covered
518 by other methods or add commonly-used dependencies.
519
520 =over 4
521
522 =item $extractor->add_perl
523
524 Adds the perl binary itself to the file set, including the libperl dll, if
525 needed.
526
527 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
528 some F<dll/libperl.so.XXX>.
529
530 =cut
531
532 sub add_perl {
533 my ($self) = @_;
534
535 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
536
537 # on debian, we have the special case of a perl binary linked against
538 # a static libperl.a (which is not available), but the Config says to use
539 # a shared library, which is in the wrong directory, too (which breaks
540 # every other perl installation on the system - they are so stupid).
541
542 # that means we can't find the libperl.so, because dbeian actively breaks
543 # their perl install, and we don't need it. we work around this by silently
544 # not including the libperl if we cannot find it.
545
546 if ($Config{useshrplib} eq "true") {
547 my ($libperl, $libpath);
548
549 if ($^O eq "cygwin") {
550 $libperl = $Config{libperl};
551 $libpath = "$Config{binexp}/$libperl";
552 } elsif ($^O eq "MSWin32") {
553 ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
554 $libpath = "$Config{binexp}/$libperl";
555 } else {
556 $libperl = $Config{libperl};
557 $libpath = $self->{lib}{"CORE/$libperl"};
558 }
559
560 $self->{set}{"dll/$libperl"} = $libpath
561 if length $libpath && -e $libpath;
562 }
563 }
564
565 =item $extractor->add_core_support
566
567 Try to add modules and files needed to support commonly-used builtin
568 language features. For example to open a scalar for I/O you need the
569 L<PerlIO::scalar> module:
570
571 open $fh, "<", \$scalar
572
573 A number of regex and string features (e.g. C<ucfirst>) need some unicore
574 files, e.g.:
575
576 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
577
578 This call adds these files (simply by executing code similar to the above
579 code fragments).
580
581 Notable things that are missing are other PerlIO layers, such as
582 L<PerlIO::encoding>, and named character and character class matches.
583
584 =cut
585
586 sub add_core_support {
587 my ($self) = @_;
588
589 $self->add_eval ('my $v; open my $fh, "<", \$v');
590 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
591 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
592 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
593 }
594
595 =item $extractor->add_unicore
596
597 Adds (hopefully) all files from the unicore database that will ever be
598 needed.
599
600 If you are not sure which unicode character classes and similar unicore
601 databases you need, and you do not care about an extra one thousand(!)
602 files comprising 4MB of data, then you can just call this method, which
603 adds basically all files from perl's unicode database.
604
605 Note that C<add_core_support> also adds some unicore files, but it's not a
606 subset of C<add_unicore> - the former adds all files neccessary to support
607 core builtins (which includes some unicore files and other things), while
608 the latter adds all unicore files (but nothing else).
609
610 When in doubt, use both.
611
612 =cut
613
614 sub add_unicore {
615 my ($self) = @_;
616
617 $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
618 }
619
620 =item $extractor->add_core
621
622 This adds all files from the perl core distribution, that is, all library
623 files that come with perl.
624
625 This is a superset of C<add_core_support> and C<add_unicore>.
626
627 This is quite a lot, but on the plus side, you can be sure nothing is
628 missing.
629
630 This requires a full perl installation - Debian GNU/Linux doesn't package
631 the full perl library, so this function will not work there.
632
633 =cut
634
635 sub add_core {
636 my ($self) = @_;
637
638 my $lib = $self->{lib};
639
640 for (@{
641 $self->_read_packlist (".packlist")
642 }) {
643 $self->{set}{$_} ||= [
644 "lib/"
645 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
646 ];
647 }
648 }
649
650 =back
651
652 =head2 GLOB-BASED ADDING AND FILTERING
653
654 These methods add or manipulate files by using glob-based patterns.
655
656 These glob patterns work similarly to glob patterns in the shell:
657
658 =over 4
659
660 =item /
661
662 A F</> at the start of the pattern interprets the pattern as a file
663 path inside the file set, almost the same as in the shell. For example,
664 F</bin/perl*> would match all files whose names starting with F<perl>
665 inside the F<bin> directory in the set.
666
667 If the F</> is missing, then the pattern is interpreted as a module name
668 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
669 while F<Coro::*> would match F<lib/Coro/*.pm>.
670
671 =item *
672
673 A single star matches anything inside a single directory component. For
674 example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
675 F<lib/Coro/> directory, but not any files deeper in the hierarchy.
676
677 Another way to look at it is that a single star matches anything but a
678 slash (F</>).
679
680 =item **
681
682 A double star matches any number of characters in the path, including F</>.
683
684 For example, F<AnyEvent::**> would match all modules whose names start
685 with C<AnyEvent::>, no matter how deep in the hierarchy they are.
686
687 =back
688
689 =cut
690
691 sub _extglob2re {
692 for (quotemeta $_[1]) {
693 s/\\\*\\\*/.*/g;
694 s/\\\*/[^\/]*/g;
695 s/\\\?/[^\/]/g;
696
697 unless (s%^\\/%%) {
698 s%\\:\\:%/%g;
699 $_ = "lib/$_\\.pm";
700 }
701
702 $_ .= '$';
703 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
704
705 return qr<^$_>s
706 }
707 }
708
709 =over 4
710
711 =item $extractor->add_glob ($modglob[, $modglob...])
712
713 Adds all files from the perl library that match the given glob pattern.
714
715 For example, you could implement C<add_unicore> yourself like this:
716
717 $extractor->add_glob ("/unicore/**.pl");
718
719 =cut
720
721 sub add_glob {
722 my $self = shift;
723
724 for (@_) {
725 my $pat = $self->_extglob2re ($_);
726 $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
727 }
728 }
729
730 =item $extractor->filter ($pattern[, $pattern...])
731
732 Applies a series of include/exclude filters. Each filter must start with
733 either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
734 pattern. The rest of the pattern is a normal glob pattern.
735
736 An exclude pattern (C<->) instantly removes all matching files from
737 the set. An include pattern (C<+>) protects matching files from later
738 removals.
739
740 That is, if you have an include pattern then all files that were matched
741 by it will be included in the set, regardless of any further exclude
742 patterns matching the same files.
743
744 Likewise, any file excluded by a pattern will not be included in the set,
745 even if matched by later include patterns.
746
747 Any files not matched by any expression will simply stay in the set.
748
749 For example, to remove most of the useless autoload functions by the POSIX
750 module (they either do the same thing as a builtin or always raise an
751 error), you would use this:
752
753 $extractor->filter ("-/lib/auto/POSIX/*.al");
754
755 This does not remove all autoload files, only the ones not defined by a
756 subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
757
758 =cut
759
760 sub filter {
761 my ($self, @patterns) = @_;
762
763 $self->_trace_flush;
764
765 my $set = $self->{set};
766 my %include;
767
768 for my $pat (@patterns) {
769 $pat =~ s/^([+\-])//
770 or croak "$_: not a valid filter pattern (missing + or - prefix)";
771 my $inc = $1 eq "+";
772 $pat = $self->_extglob2re ($pat);
773
774 my @match = grep /$pat/, keys %$set;
775
776 if ($inc) {
777 @include{@match} = delete @$set{@match};
778 } else {
779 delete @$set{@{ $_->[I_DEP] }} # remove dependents
780 for delete @$set{@match};
781 }
782 }
783
784 my @include = keys %include;
785 @$set{@include} = delete @include{@include};
786 }
787
788 =item $extractor->runtime_only
789
790 This removes all files that are not needed at runtime, such as static
791 archives, header and other files needed only for compilation of modules,
792 and pod and html files (which are unlikely to be needed at runtime).
793
794 This is quite useful when you want to have only files actually needed to
795 execute a program.
796
797 =cut
798
799 sub runtime_only {
800 my ($self) = @_;
801
802 $self->_trace_flush;
803
804 my $set = $self->{set};
805
806 # delete all static libraries, also windows stuff
807 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
808
809 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
810 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
811
812 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
813 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
814
815 # delete unneeded unicore files
816 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
817 }
818
819 =back
820
821 =head2 RESULT SET
822
823 =over 4
824
825 =item $set = $extractor->set
826
827 Returns a hash reference that represents the result set. The hash is the
828 actual internal storage hash and can only be modified as described below.
829
830 Each key in the hash is the path inside the set, without a leading slash,
831 e.g.:
832
833 bin/perl
834 lib/unicore/lib/Blk/Superscr.pl
835 lib/AnyEvent/Impl/EV.pm
836
837 The value is an array reference with mostly unspecified contents, except
838 the first element, which is the file system path where the actual file can
839 be found.
840
841 This code snippet lists all files inside the set:
842
843 print "$_\n"
844 for sort keys %{ $extractor->set });
845
846 This code fragment prints C<< filesystem_path => set_path >> pairs for all
847 files in the set:
848
849 my $set = $extractor->set;
850 while (my ($set,$fspath) = each %$set) {
851 print "$fspath => $set\n";
852 }
853
854 You can implement your own filtering by asking for the result set with
855 C<< $extractor->set >>, and then deleting keys from the referenced hash
856 - since you can ask for the result set at any time you can add things,
857 filter them out this way, and add additional things.
858
859 =back
860
861 =cut
862
863 sub set {
864 $_[0]->_trace_flush;
865 $_[0]{set}
866 }
867
868 =head1 EXAMPLE
869
870 To package he deliantra client (L<Deliantra::Client>), finding all
871 (perl) files needed to run it is a first step. This can be done by using
872 something like the following code snippet:
873
874 my $ex = new Perl::LibExtractor;
875
876 $ex->add_perl;
877 $ex->add_core_support;
878 $ex->add_bin ("deliantra");
879 $ex->add_mod ("AnyEvent::Impl::EV");
880 $ex->add_mod ("AnyEvent::Impl::Perl");
881 $ex->add_mod ("Urlader");
882 $ex->filter ("-/*/auto/POSIX/**.al");
883 $ex->runtime_only;
884
885 First it sets the perl library directory to F<pm> and F<.> (the latter
886 to work around some AutoLoader bugs), so perl uses only the perl library
887 files that came with the binary package.
888
889 Then it sets some environment variable to override the system default
890 (which might be incompatible).
891
892 Then it runs the client itself, using C<require>. Since C<require> only
893 looks in the perl library directory this is the reaosn why the scripts
894 were put there (of course, since F<.> is also included it doesn't matter,
895 but I refuse to yield to bugs).
896
897 Finally it exits with a clean status to signal "ok" to Urlader.
898
899 Back to the original C<Perl::LibExtractor> script: after initialising a
900 new set, the script simply adds the F<perl> interpreter and core support
901 files (just in case, not all are needed, but some are, and I am too lazy
902 to find out which ones exactly).
903
904 Then it adds the deliantra executable itself, which in turn adds most of
905 the required modules. After that, the AnyEvent implementation modules are
906 added because these dependencies are not picked up automatically.
907
908 The L<Urlader> module is added because the client itself does not depend
909 on it at all, but the wrapper does.
910
911 At this point, all required files are present, and it's time to slim
912 down: most of the ueseless POSIX autoloaded functions are removed,
913 not because they are so big, but because creating files is a costly
914 operation in itself, so even small fiels have considerable overhead when
915 unpacking. Then files not required for running the client are removed.
916
917 And that concludes it, the set is now ready.
918
919 =head1 SEE ALSO
920
921 The utility program that comes with this module: L<perl-libextract>.
922
923 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
924
925 =head1 LICENSE
926
927 This software package is licensed under the GPL version 3 or any later
928 version, see COPYING for details.
929
930 This license does not, of course, apply to any output generated by this
931 software.
932
933 =head1 AUTHOR
934
935 Marc Lehmann <schmorp@schmorp.de>
936 http://home.schmorp.de/
937
938 =cut
939
940 1;
941