ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/cvsroot/Perl-LibExtractor/LibExtractor.pm
Revision: 1.18
Committed: Fri Jan 27 00:27:25 2012 UTC (12 years, 5 months ago) by root
Branch: MAIN
Changes since 1.17: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::LibExtractor - determine perl library subsets for building distributions
4
5 =head1 SYNOPSIS
6
7 use Perl::LibExtractor;
8
9 =head1 DESCRIPTION
10
11 The purpose of this module is to determine subsets of your perl library,
12 that is, a set of files needed to satisfy certain dependencies (e.g. of a
13 program).
14
15 The goal is to extract a part of your perl installation including
16 dependencies. A typical use case for this module would be to find out
17 which files are needed to be build a L<PAR> distribution, to link into
18 an L<App::Staticperl> binary, or to pack with L<Urlader>, to create
19 stand-alone distributions tailormade to run your app.
20
21 =head1 METHODS
22
23 To use this module, first call the C<new>-constructor and then as many
24 other methods as you want, to generate a set of files. Then query the set
25 of files and do whatever you want with them.
26
27 The command-line utility F<perl-libextract> can be a convenient
28 alternative to using this module directly, and offers a few extra options,
29 such as to copy out the files into a new directory, strip them and/or
30 manipulate them in other ways.
31
32 =cut
33
34 package Perl::LibExtractor;
35
36 our $VERSION = '0.1';
37
38 use Config;
39 use File::Spec ();
40 use File::Temp ();
41
42 use common::sense;
43
44 sub I_SRC () { 0 }
45 sub I_DEP () { 1 }
46
47 sub croak($) {
48 require Carp;
49 Carp::croak "(Perl::LibExtractor) $_[0]";
50 }
51
52 my $canonpath = File::Spec->can ("canonpath");
53 my $case_tolerant = File::Spec->case_tolerant;
54
55 sub canonpath($) {
56 local $_ = $canonpath->(File::Spec::, $_[0]);
57 s%\\%/%g;
58 # $_ = lc if $case_tolerant; # we assume perl file name case is always the same
59 $_
60 }
61
62 =head2 CREATION
63
64 =over 4
65
66 =item $extractor = new Perl::LibExtractor [key => value...]
67
68 Creates a new extractor object. Each extractor object stores some
69 configuration options and a subset of files that can be queried at any
70 time,.
71
72 Binary executables (such as the perl interpreter) are stored inside
73 F<bin/>, perl scripts are stored under F<script/>, perl library files are
74 stored under F<lib/> and shared libraries are stored under F<dll/>.
75
76 The following key-value pairs exist, with default values as specified.
77
78 =over 4
79
80 =item inc => \@INC without "."
81
82 An arrayref with paths to perl library directories. The default is
83 C<\@INC>, with F<.> removed.
84
85 To prepend custom dirs just do this:
86
87 inc => ["mydir", @INC],
88
89 =item use_packlist => 1
90
91 Enable (if true) or disable the use of C<.packlist> files. If enabled,
92 then each time a file is traced, the complete distribution that contains
93 it is included (but not traced).
94
95 If disabled, only shared objects and autoload files will be added.
96
97 Debian GNU/Linux doesn't completely package perl or any perl modules, so
98 this option will fail. Other perls should be fine.
99
100 =item extra_deps => { file => [files...] }
101
102 Some (mainly runtime dependencies in the perl core library) cannot be
103 detected automatically by this module, especially if you don't use
104 packlists and C<add_core>.
105
106 This module comes with a set of default dependencies (such as L<Carp>
107 requiring L<Carp::Heavy>), which you cna override with this parameter.
108
109 To see the default set of dependencies that come with this module, use
110 this:
111
112 perl -MPerl::LibExtractor -MData::Dumper -e 'print Dumper $Perl::LibExtractor::EXTRA_DEPS'
113
114 =back
115
116 =cut
117
118 our $EXTRA_DEPS = {
119 'bytes.pm' => ['bytes_heavy.pl'],
120 'utf8.pm' => ['utf8_heavy.pl'],
121 'Config.pm' => ['Config_heavy.pl', 'Config_git.pl'],
122 'Carp.pm' => ['Carp/Heavy.pm'],
123 'Exporter.pm' => ['Exporter/Heavy.pm'],
124 };
125
126 sub new {
127 my ($class, %kv) = @_;
128
129 my $self = bless {
130 inc => [grep $_ ne ".", @INC],
131 use_packlist => 1,
132 extra_deps => $EXTRA_DEPS,
133 %kv,
134 set => {},
135 }, $class;
136
137 my %inc_seen;
138 my @inc = grep !$inc_seen{$_}++ && -d "$_/.", @{ $self->{inc} };
139 $self->{inc} = \@inc;
140
141 # maybe not inc, but these?
142 # sitearchexp
143 # sitelib
144 # vendorarchexp
145 # vendorlibexp
146 # archlibexp
147 # privlibexp
148
149 $self->_set_inc;
150
151 $self
152 }
153
154 sub _perl_path() {
155 my $secure_perl_path = $Config{perlpath};
156
157 if ($^O ne 'VMS') {
158 $secure_perl_path .= $Config{_exe}
159 unless $secure_perl_path =~ m/$Config{_exe}$/i;
160 }
161
162 $secure_perl_path
163 }
164
165 sub _path2match {
166 my $re = join "|", map "\Q$_", @_;
167
168 $re = "(?:$re)\\/";
169 $re =~ s%\\[/\\]%[/\\\\]%g; # we support / and \ on all OSes, keep your fingers crossed
170
171 $case_tolerant
172 ? qr<$re>i
173 : qr<$re>
174 }
175
176 sub _read_packlist {
177 my ($self, $path) = @_;
178
179 my $pfxmatch = $self->{pfxmatch};
180 my $lib = $self->{lib};
181
182 my @packlist;
183
184 open my $fh, "<:perlio", $path
185 or die "$path: $!";
186
187 while (<$fh>) {
188 chomp;
189 s/ .*$//; # newer-style .packlists might contain key=value pairs
190
191 s/$pfxmatch// and exists $lib->{$_}
192 or next;
193
194 push @packlist, canonpath $_;
195 }
196
197 \@packlist
198 }
199
200 sub _set_inc {
201 my ($self) = @_;
202
203 my $pfxmatch = _path2match @{ $self->{inc }};
204
205 my %lib;
206 my @packlists;
207
208 # find all files in all libdirs, earlier ones overwrite later ones
209 my @scan = map [$_, ""], @{ $self->{inc} };
210
211 while (@scan) {
212 my ($root, $dir) = @{ pop @scan };
213
214 my $pfx = length $dir ? "$dir/" : "";
215
216 for (do {
217 opendir my $fh, "$root/$dir"
218 or croak "$root/$dir: $!";
219 grep !/^\.\.?$/, readdir $fh
220 }) {
221 if (-d "$root/$dir/$_/.") {
222 $lib{"$pfx$_/"} = "$root/$pfx$_";
223 push @scan, [$root, "$pfx$_"];
224 } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) {
225 push @packlists, "$root/$pfx.packlist";
226 } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") {
227 # skip empty .bs files
228 # } elsif (/\.(?:pod|h|html)$/) {
229 # # not interested in those
230 } else {
231 #push @files, $_;
232 $lib{"$pfx$_"} = "$root/$pfx$_";
233 }
234 }
235
236 #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite
237 }
238
239 $self->{lib} = \%lib;
240 $self->{pfxmatch} = $pfxmatch;
241
242 my %packlist;
243
244 # need to go forward here
245 for (@packlists) {
246 my $packlist = $self->_read_packlist ($_);
247
248 $packlist{$_} = $packlist
249 for @$packlist;
250 }
251
252 $self->{packlist} = \%packlist;
253 }
254
255 =back
256
257 =head2 TRACE/PACKLIST BASED ADDING
258
259 The following methods add various things to the set of files.
260
261 Each time a perl file is added, it is scanned by tracing either loading,
262 execution or compiling it, and seeing which other perl modules and
263 libraries have been loaded.
264
265 For each library file found this way, additional dependencies are added:
266 if packlists are enabled, then all files of the distribution that contains
267 the file will be added. If packlists are disabled, then only shared
268 objects and autoload files for modules will be added.
269
270 Only files from perl library directories will be added automatically. Any
271 other files (such as manpages or scripts installed in the F<bin>
272 directory) are skipped.
273
274 If there is an error, such as a module not being found, then this module
275 croaks (as opposed to silently skipping). If you want to add something of
276 which you are not sure it exists, then you can wrap the call into C<eval
277 {}>. In some cases, you can avoid this by executing the code you want
278 to work later using C<add_eval> - see C<add_core_support> for an actual
279 example of this technique.
280
281 Note that packlists are meant to add files not covered by other
282 mechanisms, such as resource files and other data files loaded directly by
283 a module - they are not meant to add dependencies that are missed because
284 they only happen at runtime.
285
286 For example, with packlists, when using L<AnyEvent>, then all event loop
287 backends are automatically added as well, but I<not> any event loops
288 (i.e. L<AnyEvent::Impl::POE> is added, but L<POE> itself is not). Without
289 packlists, only the backend that is being used is added (i.e. normally
290 none, as loading AnyEvent does not instantly load any backend).
291
292 To catch the extra event loop dependencies, you can either initialise
293 AnyEvent so it picks a suitable backend:
294
295 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
296
297 Or you can directly load the backend modules you plan to use:
298
299 $extractor->add_mod ("AnyEvent::Impl::EV", "AnyEvent::Impl::Perl");
300
301 An example of a program (or module) that has extra resource files is
302 L<Deliantra::Client> - the normal tracing (without packlist usage) will
303 correctly add all submodules, but miss the fonts and textures. By using
304 the packlist, those files are added correctly.
305
306 =over 4
307
308 =cut
309
310 sub _add {
311 my ($self, $add) = @_;
312
313 my $lib = $self->{lib};
314 my $path;
315
316 for (@$add) {
317 $path = "lib/$_";
318
319 $self->{set}{$path} ||= do {
320 my @info;
321
322 $info[I_SRC] = $lib->{$_}
323 or croak "$_: unable to locate file in perl library";
324
325 if ($self->{use_packlist} && exists $self->{packlist}{$_}) {
326 $self->{set}{"lib/$_"} ||= [$self->{lib}{$_} or die]
327 for @{ $self->{packlist}{$_} };
328
329 # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) {
330 # s/\.pm$//;
331 # s%/%::%g;
332 # my $pkg = "libextractor" . ++$self->{count};
333 # $self->add_eval ("{ package $pkg; eval 'use $_' }")
334 # unless $self->{_add_do}{$_}++;
335 # }
336 #
337 # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00")
338 # for grep /\.pl$/, @{ $self->{packlist}{$_} };
339
340 } elsif (/^(.*)\.pm$/) {
341 (my $auto = "auto/$1/") =~ s%::%/%g;
342 $auto =~ m%/([^/]+)/$% or die;
343 my $base = $1;
344
345 if (exists $lib->{$auto}) {
346 # auto dir exists, scan it for cool stuff
347
348 # 1. shared object, others are of no interest to us
349 my $so = "$auto$base.$Config{dlext}";
350 if (my $src = $lib->{$so}) {
351 $so = "lib/$so";
352 push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src];
353 }
354
355 # 2. autoloader/autosplit
356 my $ix = "${auto}autosplit.ix";
357 if (my $src = $lib->{$ix}) {
358 $ix = "lib/$ix";
359 push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src];
360
361 open my $fh, "<:perlio", $src
362 or croak "$src: $!";
363
364 my $package;
365
366 while (<$fh>) {
367 if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) {
368 my $al = "auto/$package/$1.al";
369 my $src = $lib->{$al}
370 or croak "$al: autoload file not found, but should be there.";
371
372 $al = "lib/$al";
373 push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src];
374
375 } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) {
376 ($package = $1) =~ s/::/\//g;
377 } elsif (/^\s*(?:#|1?\s*;?\s*$)/) {
378 # nop
379 } else {
380 warn "WARNING: $src: unparsable line, please report: $_";
381 }
382 }
383 }
384
385 skip:
386 }
387 }
388
389 if (exists $self->{extra_deps}{$_}) {
390 # we require it again, because many extra dependencies require the main module to be loaded
391 $self->add_eval ("require q\x00$_\x00");
392
393 exists $lib->{$_} and $self->add_require ($_)
394 for @{ $self->{extra_deps}{$_} };
395 }
396
397 \@info
398 };
399 }
400 }
401
402 sub _trace {
403 my ($self, $file, $eval) = @_;
404
405 $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n";
406 }
407
408 sub _trace_flush {
409 my ($self) = @_;
410
411 # ->_add might add additional files to trace
412 while (exists $self->{trace_begin} or exists $self->{trace_check}) {
413 my $tmpdir = newdir File::Temp;
414 my $dir = $tmpdir->dirname;
415
416 open my $fh, ">:perlio", "$dir/eval"
417 or croak "$dir/eval: $!";
418 syswrite $fh,
419 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n"
420 . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n"
421 . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n"
422 . "CHECK {\n"
423 . 'open STDOUT, ">:raw", "out" or die "out: $!";'
424 . 'print join "\x00", values %INC;'
425 . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl
426 . "}\n"
427 . (delete $self->{trace_check});
428 close $fh;
429
430 system _perl_path, "-c", "$dir/eval"
431 and croak "trace failure, check trace process output - caught";
432
433 my @inc = split /\x00/, do {
434 open my $fh, "<:perlio", "$dir/out"
435 or croak "$dir/out: $!";
436 local $/;
437 scalar readline $fh
438 };
439
440 my $pfxmatch = $self->{pfxmatch};
441
442 # remove the library directory prefix, hope for the best
443 s/$pfxmatch//
444 or croak "$_: file outside any library directory"
445 for @inc;
446
447 $self->_add (\@inc);
448 }
449 }
450
451 =item $extractor->add_mod ($module[, $module...])
452
453 Adds the given module(s) to the file set - the module name must be specified
454 as in C<use>, i.e. with C<::> as separators and without F<.pm>.
455
456 The program will be loaded with the default import list, any dependent
457 files, such as the shared object implementing xs functions, or autoload
458 files, will also be added.
459
460 If you want to use a different import list (for those rare modules wghere
461 import lists trigger different backend modules to be loaded for example),
462 you can use C<add_eval> instead:
463
464 $extractor->add_eval ("use Module qw(a b c)");
465
466 Example: add F<Coro.pm> and F<AnyEvent/AIO.pm>, and all relevant files
467 from the distribution they are part of.
468
469 $extractor->add_mod ("Coro", "AnyEvent::AIO");
470
471 =cut
472
473 sub add_mod {
474 my $self = shift;
475
476 for (@_) {
477 my $pkg = "libextractor" . ++$self->{count};
478 $self->_trace ("use $_", "{ package $pkg; use $_ }")
479 unless $self->{add_mod}{$_}++;
480 }
481 }
482
483 =item $extractor->add_require ($name[, $name...])
484
485 Works like C<add_mod>, but uses C<require $name> to load the module, i.e.
486 the name must be a filename.
487
488 Example: load Coro and AnyEvent::AIO, but using C<add_require> instead of C<add_mod>.
489
490 $extractor->add_require ("Coro.pm", "AnyEvent/AIO.pm");
491
492 =cut
493
494 sub add_require {
495 my $self = shift;
496
497 for (@_) {
498 $self->add_eval ("require q\x00$_\x00")
499 unless $self->{add_require}{$_}++;
500 }
501 }
502
503 =item $extractor->add_bin ($name[, $name...])
504
505 Adds the given (perl) program(s) to the file set, that is, a program
506 installed by some perl module, written in perl (an example would be the
507 L<perl-libextract> program that is part of the C<Perl::LibExtractor>
508 distribution).
509
510 Example: add the deliantra client program installed by the
511 L<Deliantra::Client> module and put it under F<bin/deliantra>.
512
513 $extractor->add_bin ("deliantra");
514
515 =cut
516
517 sub add_bin {
518 my $self = shift;
519
520 exe:
521 for my $exe (@_) {
522 for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) {
523 if (open my $fh, "<:perlio", "$dir/$exe") {
524 if (-f $fh) {
525 my $file = do { local $/; readline $fh };
526
527 $self->_trace_flush if exists $self->{trace_check};
528 $self->{trace_check} = $file;
529
530 $self->{set}{"bin/$exe"} = ["$dir/$exe"];
531 next exe;
532 }
533 }
534 }
535
536 croak "add_bin ($exe): executable not found";
537 }
538 }
539
540 =item $extractor->add_eval ($string)
541
542 Evaluates the string as perl code and adds all modules that are loaded
543 by it. For example, this would add L<AnyEvent> and the default backend
544 implementation module and event loop module:
545
546 $extractor->add_eval ("use AnyEvent; AnyEvent::detect");
547
548 Each code snippet will be executed in its own package and under C<use
549 strict>.
550
551 =cut
552
553 sub add_eval {
554 my ($self, $eval) = @_;
555
556 my $pkg = "libextractor" . ++$self->{count};
557 $eval =~ s/\x00/\x00."\\x00".q\x00/g;
558 $self->_trace ($eval,
559 "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8
560 . "eval q\x00package $pkg; BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n"
561 );
562 }
563
564 =back
565
566 =head2 OTHER METHODS FOR ADDING FILES
567
568 The following methods add commonly used files that are either not covered
569 by other methods or add commonly-used dependencies.
570
571 =over 4
572
573 =item $extractor->add_perl
574
575 Adds the perl binary itself to the file set, including the libperl dll, if
576 needed.
577
578 For example, on UNIX systems, this usually adds a F<exe/perl> and possibly
579 some F<dll/libperl.so.XXX>.
580
581 =cut
582
583 sub add_perl {
584 my ($self) = @_;
585
586 $self->{set}{"exe/perl$Config{_exe}"} = [_perl_path];
587
588 # on debian, we have the special case of a perl binary linked against
589 # a static libperl.a (which is not available), but the Config says to use
590 # a shared library, which is in the wrong directory, too (which breaks
591 # every other perl installation on the system - they are so stupid).
592
593 # that means we can't find the libperl.so, because dbeian actively breaks
594 # their perl install, and we don't need it. we work around this by silently
595 # not including the libperl if we cannot find it.
596
597 if ($Config{useshrplib} eq "true") {
598 my ($libperl, $libpath);
599
600 if ($^O eq "cygwin") {
601 $libperl = $Config{libperl};
602 $libpath = "$Config{binexp}/$libperl";
603 } elsif ($^O eq "MSWin32") {
604 ($libperl = $Config{libperl}) =~ s/\Q$Config{_a}\E$/.$Config{so}/;
605 $libpath = "$Config{binexp}/$libperl";
606 } else {
607 $libperl = $Config{libperl};
608 $libpath = $self->{lib}{"CORE/$libperl"};
609 }
610
611 $self->{set}{"dll/$libperl"} = [$libpath]
612 if length $libpath && -e $libpath;
613 }
614 }
615
616 =item $extractor->add_core_support
617
618 Try to add modules and files needed to support commonly-used builtin
619 language features. For example to open a scalar for I/O you need the
620 L<PerlIO::scalar> module:
621
622 open $fh, "<", \$scalar
623
624 A number of regex and string features (e.g. C<ucfirst>) need some unicore
625 files, e.g.:
626
627 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i';
628
629 This call adds these files (simply by executing code similar to the above
630 code fragments).
631
632 Notable things that are missing are other PerlIO layers, such as
633 L<PerlIO::encoding>, and named character and character class matches.
634
635 =cut
636
637 sub add_core_support {
638 my ($self) = @_;
639
640 $self->add_eval ('my $v; open my $fh, "<", \$v');
641 $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i');
642 $self->add_eval ('split " ", chr 1234'); # usually covered by the regex above
643 $self->add_eval ('/\x{1234}(?<a>)\g{a}/') if $] >= 5.010; # usually covered by the regex above
644 }
645
646 =item $extractor->add_unicore
647
648 Adds (hopefully) all files from the unicore database that will ever be
649 needed.
650
651 If you are not sure which unicode character classes and similar unicore
652 databases you need, and you do not care about an extra one thousand(!)
653 files comprising 4MB of data, then you can just call this method, which
654 adds basically all files from perl's unicode database.
655
656 Note that C<add_core_support> also adds some unicore files, but it's not a
657 subset of C<add_unicore> - the former adds all files neccessary to support
658 core builtins (which includes some unicore files and other things), while
659 the latter adds all unicore files (but nothing else).
660
661 When in doubt, use both.
662
663 =cut
664
665 sub add_unicore {
666 my ($self) = @_;
667
668 $self->_add ([grep m%^unicore/.*\.pl$%, keys %{ $self->{lib} }]);
669 }
670
671 =item $extractor->add_core
672
673 This adds all files from the perl core distribution, that is, all library
674 files that come with perl.
675
676 This is a superset of C<add_core_support> and C<add_unicore>.
677
678 This is quite a lot, but on the plus side, you can be sure nothing is
679 missing.
680
681 This requires a full perl installation - Debian GNU/Linux doesn't package
682 the full perl library, so this function will not work there.
683
684 =cut
685
686 sub add_core {
687 my ($self) = @_;
688
689 my $lib = $self->{lib};
690
691 for (@{
692 $self->_read_packlist (".packlist")
693 }) {
694 $self->{set}{$_} ||= [
695 "lib/"
696 . ($lib->{$_} or croak "$_: unable to locate file in perl library")
697 ];
698 }
699 }
700
701 =back
702
703 =head2 GLOB-BASED ADDING AND FILTERING
704
705 These methods add or manipulate files by using glob-based patterns.
706
707 These glob patterns work similarly to glob patterns in the shell:
708
709 =over 4
710
711 =item /
712
713 A F</> at the start of the pattern interprets the pattern as a file
714 path inside the file set, almost the same as in the shell. For example,
715 F</bin/perl*> would match all files whose names starting with F<perl>
716 inside the F<bin> directory in the set.
717
718 If the F</> is missing, then the pattern is interpreted as a module name
719 (a F<.pm> file). For example, F<Coro> matches the file F<lib/Coro.pm> ,
720 while F<Coro::*> would match F<lib/Coro/*.pm>.
721
722 =item *
723
724 A single star matches anything inside a single directory component. For
725 example, F</lib/Coro/*.pm> would match all F<.pm> files inside the
726 F<lib/Coro/> directory, but not any files deeper in the hierarchy.
727
728 Another way to look at it is that a single star matches anything but a
729 slash (F</>).
730
731 =item **
732
733 A double star matches any number of characters in the path, including F</>.
734
735 For example, F<AnyEvent::**> would match all modules whose names start
736 with C<AnyEvent::>, no matter how deep in the hierarchy they are.
737
738 =back
739
740 =cut
741
742 sub _extglob2re {
743 for (quotemeta $_[1]) {
744 s/\\\*\\\*/.*/g;
745 s/\\\*/[^\/]*/g;
746 s/\\\?/[^\/]/g;
747
748 unless (s%^\\/%%) {
749 s%\\:\\:%/%g;
750 $_ = "lib/$_\\.pm";
751 }
752
753 $_ .= '$';
754 s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end
755
756 return qr<^$_>s
757 }
758 }
759
760 =over 4
761
762 =item $extractor->add_glob ($modglob[, $modglob...])
763
764 Adds all files from the perl library that match the given glob pattern.
765
766 For example, you could implement C<add_unicore> yourself like this:
767
768 $extractor->add_glob ("/unicore/**.pl");
769
770 =cut
771
772 sub add_glob {
773 my $self = shift;
774
775 for (@_) {
776 my $pat = $self->_extglob2re ($_);
777 $self->_add ([grep /$pat/, keys %{ $self->{lib} }]);
778 }
779 }
780
781 =item $extractor->filter ($pattern[, $pattern...])
782
783 Applies a series of include/exclude filters. Each filter must start with
784 either C<+> or C<->, to designate the pattern as I<include> or I<exclude>
785 pattern. The rest of the pattern is a normal glob pattern.
786
787 An exclude pattern (C<->) instantly removes all matching files from
788 the set. An include pattern (C<+>) protects matching files from later
789 removals.
790
791 That is, if you have an include pattern then all files that were matched
792 by it will be included in the set, regardless of any further exclude
793 patterns matching the same files.
794
795 Likewise, any file excluded by a pattern will not be included in the set,
796 even if matched by later include patterns.
797
798 Any files not matched by any expression will simply stay in the set.
799
800 For example, to remove most of the useless autoload functions by the POSIX
801 module (they either do the same thing as a builtin or always raise an
802 error), you would use this:
803
804 $extractor->filter ("-/lib/auto/POSIX/*.al");
805
806 This does not remove all autoload files, only the ones not defined by a
807 subclass (e.g. it leaves C<POSIX::SigRt::xxx> alone).
808
809 =cut
810
811 sub filter {
812 my ($self, @patterns) = @_;
813
814 $self->_trace_flush;
815
816 my $set = $self->{set};
817 my %include;
818
819 for my $pat (@patterns) {
820 $pat =~ s/^([+\-])//
821 or croak "$_: not a valid filter pattern (missing + or - prefix)";
822 my $inc = $1 eq "+";
823 $pat = $self->_extglob2re ($pat);
824
825 my @match = grep /$pat/, keys %$set;
826
827 if ($inc) {
828 @include{@match} = delete @$set{@match};
829 } else {
830 delete @$set{@{ $_->[I_DEP] }} # remove dependents
831 for delete @$set{@match};
832 }
833 }
834
835 my @include = keys %include;
836 @$set{@include} = delete @include{@include};
837 }
838
839 =item $extractor->runtime_only
840
841 This removes all files that are not needed at runtime, such as static
842 archives, header and other files needed only for compilation of modules,
843 and pod and html files (which are unlikely to be needed at runtime).
844
845 This is quite useful when you want to have only files actually needed to
846 execute a program.
847
848 =cut
849
850 sub runtime_only {
851 my ($self) = @_;
852
853 $self->_trace_flush;
854
855 my $set = $self->{set};
856
857 # delete all static libraries, also windows stuff
858 delete @$set{ grep m%^lib/auto/(?:.+/)?([^\/]+)/\1(?:\Q$Config{_a}\E|\.pdb|\.exp)$%s, keys %$set };
859
860 # delete all extralibs.ld and extralibs.all (no clue what the latter is for)
861 delete @$set{ grep m%^lib/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set };
862
863 # delete all .pod, .h, .html files (hopefully none of them are used at runtime)
864 delete @$set{ grep m%^lib/.*\.(?:pod|h|html)$%s, keys %$set };
865
866 # delete unneeded unicore files
867 delete @$set{ grep m%^lib/unicore/(?:mktables(?:\.lst)?|.*\.txt)$%s, keys %$set };
868 }
869
870 =back
871
872 =head2 RESULT SET
873
874 =over 4
875
876 =item $set = $extractor->set
877
878 Returns a hash reference that represents the result set. The hash is the
879 actual internal storage hash and can only be modified as described below.
880
881 Each key in the hash is the path inside the set, without a leading slash,
882 e.g.:
883
884 bin/perl
885 lib/unicore/lib/Blk/Superscr.pl
886 lib/AnyEvent/Impl/EV.pm
887
888 The value is an array reference with mostly unspecified contents, except
889 the first element, which is the file system path where the actual file can
890 be found.
891
892 This code snippet lists all files inside the set:
893
894 print "$_\n"
895 for sort keys %{ $extractor->set });
896
897 This code fragment prints C<< filesystem_path => set_path >> pairs for all
898 files in the set:
899
900 my $set = $extractor->set;
901 while (my ($set,$fspath) = each %$set) {
902 print "$fspath => $set\n";
903 }
904
905 You can implement your own filtering by asking for the result set with
906 C<< $extractor->set >>, and then deleting keys from the referenced hash
907 - since you can ask for the result set at any time you can add things,
908 filter them out this way, and add additional things.
909
910 =back
911
912 =cut
913
914 sub set {
915 $_[0]->_trace_flush;
916 $_[0]{set}
917 }
918
919 =head1 EXAMPLE
920
921 To package he deliantra client (L<Deliantra::Client>), finding all
922 (perl) files needed to run it is a first step. This can be done by using
923 something like the following code snippet:
924
925 my $ex = new Perl::LibExtractor;
926
927 $ex->add_perl;
928 $ex->add_core_support;
929 $ex->add_bin ("deliantra");
930 $ex->add_mod ("AnyEvent::Impl::EV");
931 $ex->add_mod ("AnyEvent::Impl::Perl");
932 $ex->add_mod ("Urlader");
933 $ex->filter ("-/*/auto/POSIX/**.al");
934 $ex->runtime_only;
935
936 First it sets the perl library directory to F<pm> and F<.> (the latter
937 to work around some AutoLoader bugs), so perl uses only the perl library
938 files that came with the binary package.
939
940 Then it sets some environment variable to override the system default
941 (which might be incompatible).
942
943 Then it runs the client itself, using C<require>. Since C<require> only
944 looks in the perl library directory this is the reaosn why the scripts
945 were put there (of course, since F<.> is also included it doesn't matter,
946 but I refuse to yield to bugs).
947
948 Finally it exits with a clean status to signal "ok" to Urlader.
949
950 Back to the original C<Perl::LibExtractor> script: after initialising a
951 new set, the script simply adds the F<perl> interpreter and core support
952 files (just in case, not all are needed, but some are, and I am too lazy
953 to find out which ones exactly).
954
955 Then it adds the deliantra executable itself, which in turn adds most of
956 the required modules. After that, the AnyEvent implementation modules are
957 added because these dependencies are not picked up automatically.
958
959 The L<Urlader> module is added because the client itself does not depend
960 on it at all, but the wrapper does.
961
962 At this point, all required files are present, and it's time to slim
963 down: most of the ueseless POSIX autoloaded functions are removed,
964 not because they are so big, but because creating files is a costly
965 operation in itself, so even small fiels have considerable overhead when
966 unpacking. Then files not required for running the client are removed.
967
968 And that concludes it, the set is now ready.
969
970 =head1 SEE ALSO
971
972 The utility program that comes with this module: L<perl-libextract>.
973
974 L<App::Staticperl>, L<Urlader>, L<Perl::Squish>.
975
976 =head1 LICENSE
977
978 This software package is licensed under the GPL version 3 or any later
979 version, see COPYING for details.
980
981 This license does not, of course, apply to any output generated by this
982 software.
983
984 =head1 AUTHOR
985
986 Marc Lehmann <schmorp@schmorp.de>
987 http://home.schmorp.de/
988
989 =cut
990
991 1;
992