=head1 NAME Perl::LibExtractor - determine perl library subsets for building distributions =head1 SYNOPSIS use Perl::LibExtractor; =head1 DESCRIPTION The purpose of this module is to determine subsets of your perl library, that is, a set of files needed to satisfy certain dependencies (e.g. of a program). The goal is to extract a part of your perl installation including dependencies. A typical use case for this module would be to find out which files are needed to be build a L distribution, to link into an L binary, or to pack with L, to create stand-alone distributions tailormade to run your app. =head1 METHODS To use this module, first call the C-constructor and then as many other methods as you want, to generate a set of files. Then query the set of files and do whatever you want with them. The command-line utility F can be a convenient alternative to using this module directly, and offers a few extra options, such as to copy out the files into a new directory, strip them and/or manipulate them in other ways. =over 4 =cut package Perl::LibExtractor; our $VERSION = '0.1'; use Config; use File::Temp (); use common::sense; sub I_SRC () { 0 } sub I_DEP () { 1 } sub croak($) { require Carp; Carp::croak "(Perl::LibExtractor) $_[0]"; } =item $extractor = new Perl::LibExtractor [key => value...] Creates a new extractor object. Each extractor object stores some configuration options and a subset of files that can be queried at any time,. The following key-value pairs exist, with default values as specified. =over 4 =item exedir => "bin" The prefix to use for the suggested target path for perl executables. Defaults to F. =item libdir => "lib" The prefix to use for the suggested target path of perl library files (F<.pm>, F<.pl>, dynamic objects, autoloader index and files etc.). Defaults to F. =item bindir => "bin" The prefix to use for the suggested target path for (non-perl) executables. Defaults to F. =item dlldir => "bin" The prefix to use for the suggested target path of any shared libraries. Defaults to F. =item inc => \@INC without "." An arrayref with paths to perl library directories. The default is C<\@INC>, with F<.> removed. To prepend custom dirs just do this: inc => ["mydir", @INC], =item use_packlist => 1 Enable (if true) or disable the use of C<.packlist> files. If enabled, then each time a module is included, the complete distribution that contains it is included (and traced) as well. See L, below. =back =cut sub new { my ($class, %kv) = @_; my $self = bless { exedir => "bin", libdir => "lib", bindir => "bin", dlldir => "bin", inc => [grep $_ ne ".", @INC], use_packlist => 1, %kv, set => {}, }, $class; my %inc_seen; my @inc = grep !$inc_seen{$_}++, @{ $self->{inc} }; $self->{inc} = \@inc; $self->_set_inc; $self } sub _set_inc { my ($self) = @_; my $matchprefix = join "|", map "\Q$_", @{ $self->{inc }}; $matchprefix = qr<^(?:$matchprefix)/>; my %lib; my @packlists; # find all files in all libdirs, earlier ones overwrite later ones my @scan = map [$_, ""], @{ $self->{inc} }; while (@scan) { my ($root, $dir) = @{ pop @scan }; my $pfx = length $dir ? "$dir/" : ""; for (do { opendir my $fh, "$root/$dir" or croak "$root/$dir: $!"; grep !/^\.\.?$/, readdir $fh }) { if (-d "$root/$dir/$_/.") { $lib{"$pfx$_/"} = "$root/$pfx$_"; push @scan, [$root, "$pfx$_"]; } elsif ($_ eq ".packlist" && $pfx =~ m%^auto/%) { push @packlists, [$root, $pfx]; } elsif (/\.bs$/ && $pfx =~ m%^auto/% && !-s "$root/$dir/$_") { # skip empty .bs files # } elsif (/\.(?:pod|h|html)$/) { # # not interested in those } else { #push @files, $_; $lib{"$pfx$_"} = "$root/$pfx$_"; } } #$lib{"$_[1]/"} = [\@dirs, \@files]; # won't work nice with overwrite } my %packlist; # need to go forward here for (@packlists) { my ($root, $auto) = @$_; my @packlist; open my $fh, "<:perlio", "$root/$auto/.packlist" or die "$root/$auto/.packlist: $!"; $root = qr<^\Q$root/>; while (<$fh>) { chomp; s/ .*$//; # newer-style .packlists might contain key=value pairs s%/\./%/%g; # yeah, these too s/$root// or next; exists $lib{$_} or next; push @packlist, $_; $packlist{$_} = \@packlist; } } $self->{lib} = \%lib; $self->{packlist} = \%packlist; $self->{matchprefix} = $matchprefix; } sub _add { my ($self, $add) = @_; my $lib = $self->{lib}; my $path; for (@$add) { $path = "$self->{libdir}/$_"; $self->{set}{$path} ||= do { my @info; $info[I_SRC] = $lib->{$_} or croak "$_: unable to locate file in perl library"; if ($self->{use_packlist} && exists $self->{packlist}{$_}) { $self->{set}{"$self->{libdir}/$_"} ||= [$self->{lib}{$_} or die] for @{ $self->{packlist}{$_} }; # for (grep /\.pm$/, @{ $self->{packlist}{$_} }) { # s/\.pm$//; # s%/%::%g; # my $pkg = "libextractor" . ++$self->{count}; # $self->add_eval ("{ package $pkg; eval 'use $_' }") # unless $self->{_add_do}{$_}++; # } # # $self->{_add_do}{$_}++ or $self->add_eval ("do q\x00$_\x00") # for grep /\.pl$/, @{ $self->{packlist}{$_} }; } elsif (/^(.*)\.pm$/) { (my $auto = "auto/$1/") =~ s%::%/%g; $auto =~ m%/([^/]+)/$% or die; my $base = $1; if (exists $lib->{$auto}) { # auto dir exists, scan it for cool stuff # 1. shared object, others are of no interest to us my $so = "$auto$base.$Config{dlext}"; if (my $src = $lib->{$so}) { $so = "$self->{libdir}/$so"; push @{ $info[I_DEP] }, $so; $self->{set}{$so} = [$src]; } # 2. autoloader/autosplit my $ix = "${auto}autosplit.ix"; if (my $src = $lib->{$ix}) { $ix = "$self->{libdir}/$ix"; push @{ $info[I_DEP] }, $ix; $self->{set}{$ix} = [$src]; open my $fh, "<:perlio", $src or croak "$src: $!"; my $package; while (<$fh>) { if (/^\s*sub\s+ ([^[:space:];]+) \s* (?:\([^)]*\))? \s*;?\s*$/x) { my $al = "auto/$package/$1.al"; my $src = $lib->{$al} or croak "$al: autoload file not found, but should be there."; $al = "$self->{libdir}/$al"; push @{ $info[I_DEP] }, $al; $self->{set}{$al} = [$src]; } elsif (/^\s*package\s+([^[:space:];]+)\s*;?\s*$/) { ($package = $1) =~ s/::/\//g; } elsif (/^\s*(?:#|1?\s*;?\s*$)/) { # nop } else { warn "WARNING: $src: unparsable line, please report: $_"; } } } skip: } } \@info }; } } sub _trace { my ($self, $file, $eval) = @_; $self->{trace_begin} .= "\n#line \"$file\" 1\n$eval;\n"; } sub _trace_flush { my ($self) = @_; # ->_add might add additional files to trace while (exists $self->{trace_begin} or exists $self->{trace_check}) { my $tmpdir = newdir File::Temp; my $dir = $tmpdir->dirname; open my $fh, ">:perlio", "$dir/eval" or croak "$dir/eval: $!"; syswrite $fh, 'BEGIN { @INC = (' . (join ", ", map "q\x00$_\x00", @{ $self->{inc} }) . ") }\n" . "BEGIN { chdir q\x00$dir\x00 or die q\x00$dir: \$!\x00 }\n" . 'BEGIN { ' . (delete $self->{trace_begin}) . "}\n" . "CHECK {\n" . 'open STDOUT, ">:raw", "out" or die "out: $!";' . 'print join "\x00", values %INC;' . 'open STDERR, ">stderr";' # suppress "syntax OK" message from perl . "}\n" . (delete $self->{trace_check}); close $fh; my $secure_perl_path = $Config{perlpath}; if ($^O ne 'VMS') { $secure_perl_path .= $Config{_exe} unless $secure_perl_path =~ m/$Config{_exe}$/i; } system $secure_perl_path, "-c", "$dir/eval" and croak "trace failure, check trace process output - caught"; my @inc = split /\x00/, do { open my $fh, "<:perlio", "$dir/out" or croak "$dir/out: $!"; local $/; scalar readline $fh }; my $matchprefix = $self->{matchprefix}; # remove the library directory prefix, hope for the best s/$matchprefix// or croak "$_: file outside any library directory" for @inc; $self->_add (\@inc); } } =item $extractor->add_mod ($module[, $module...]) Adds the given module(s) to the file set - the module name must be specified as in C, i.e. with C<::> as separators and without F<.pm>. The program will be loaded with the default import list, any dependent files, such as the shared object implementing xs functions, or autoload files, will also be added. Example: add F and F, and all relevant files from the distribution they are part of. $extractor->add_mod ("Coro", "AnyEvent::AIO"); =cut sub add_mod { my $self = shift; for (@_) { my $pkg = "libextractor" . ++$self->{count}; $self->_trace ("use $_", "{ package $pkg; use $_ }") unless $self->{add_mod}{$_}++; } } =item $extractor->add_exe ($name[, $name...]) Adds the given (perl) program(s) to the file set, that is, a program installed by some perl module, written in perl (an example would be the L program that is part of the C distribution). Example: add the deliantra client installed by the L module. $extractor->add_exe ("deliantra"); =cut sub add_exe { my $self = shift; exe: for my $exe (@_) { for my $dir ($Config{sitebinexp}, $Config{vendorbinexp}, $Config{binexp}) { if (open my $fh, "<:perlio", "$dir/$exe") { my $file = do { local $/; readline $fh }; $self->_trace_flush if exists $self->{trace_check}; $self->{trace_check} = $file; $self->{set}{"$self->{exedir}/$exe"} = ["$dir/$exe"]; next exe; } } croak "add_exe ($exe): executable not found"; } } =item $extractor->add_eval ($string) Evaluates the string and adds all modules that are loaded by it. For example, this would add L and the default backend implementation module and event loop module: $extractor->add_eval ("use AnyEvent; AnyEvent::detect"); =cut sub add_eval { my ($self, $eval) = @_; $eval =~ s/\x00/\x00."\\x00".q\x00/g; $self->_trace ($eval, "local \$^H = \$^H;" # vvvvvvvvvvvvvvvvvvvv = use strict; use utf8 . "eval q\x00BEGIN { \$^H = \$^H | 0x800600 } $eval\x00; die \"\$\@\" if \$\@;\n" ); } =item $extractor->add_perl Adds the perl binary itself to the file set, including the libperl dll, if needed. #TODO# =item $extractor->add_core_support Try to add modules and files needed to support commonly-used builtin language features. For example to open a scalar for I/O you need the L module: open $fh, "<", \$scalar A number of regex and string features (e.g. C) need some unicore files, e.g.: 'my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|$x/i'; This call adds these files (simply by executing code similar to the above code fragments). Notable things that are missing are other PerlIO layers, such as L, and named character and character class matches. =cut sub add_core_support { my ($self) = @_; $self->add_eval ('my $v; open my $fh, "<", \$v'); $self->add_eval ('my $x = chr 1234; "\u$x\U$x\l$x\L$x"; $x =~ /\d|\w|\s|\b|\R|\h|\v|$x/i'); } =item $extractor->add_unicore #TODO =item $extractor->add_glob ($modglob[, $modglob...]) #TODO# =item $extractor->filter ($pattern[, $pattern...]) Applies a series of include/exclude filters. Each filter must start with either C<+> or C<->, to designate the pattern as I or I pattern. The rest of the pattern is an extended glob pattern (see L). Each pattern is instantly applied, and all matching files will be permanently included or excluded, that is, if you have an include pattern then all files that were matched by it will be included in the set, regardless of any further exclude patterns matching the same files. Likewise, any file excluded by a pattern will not be included in the set, even if matches by later include patterns. Any files not matched by any expression will be included, that is, the filter list has an implicit C<+/**> pattern at the end. =cut sub _extglob2re { my $self = shift; for (quotemeta $_[0]) { s/\\\*\\\*/.*/g; s/\\\*/[^\/]*/g; s/\\\?/[^\/]/g; unless (s%^\\/%%) { s%\\:\\:%/%g; $_ = (quotemeta $self->{libdir}) . "/$_\\.pm"; } $_ .= '$'; s/(?: \[\^\/\] | \. ) \*\$$//x; # remove ** at end return qr<^$_>s } } sub filter { my ($self, @patterns) = @_; $self->_trace_flush; my $set = $self->{set}; my %include; for my $pat (@patterns) { $pat =~ s/^([+\-])// or croak "$_: not a valid filter pattern (missing + or - prefix)"; my $inc = $1 eq "+"; $pat = $self->_extglob2re ($pat); my @match = grep /$pat/, keys %$set; if ($inc) { @include{@match} = delete @$set{@match}; } else { delete @$set{@{ $_->[I_DEP] }} # remove dependents for delete @$set{@match}; } } my @include = keys %include; @$set{@include} = delete @include{@include}; } =item $extractor->runtime_only This removes all files that are not needed at runtime, such as static archives, header and other files needed only for compilation of modules, and pod and html files (which are unlikely to be needed at runtime). This is quite useful when you want to have only fiels actually needed to execute a program. =cut sub runtime_only { my ($self) = @_; $self->_trace_flush; my $set = $self->{set}; # delete all static libraries delete @$set{ grep m%^\Q$self->{libdir}\E/auto/(?:.+/)?([^\/]+)/\1\Q$Config{_a}\E$%s, keys %$set }; # delete all extralibs.ld and extralibs.all (no clue what the latter is for) delete @$set{ grep m%^\Q$self->{libdir}\E/auto/.*/extralibs\.(?:ld|all)$%s, keys %$set }; # delete all .pod, .h, .html files (hopefully none of them are used at runtime) delete @$set{ grep m%^\Q$self->{libdir}\E/.*.(?:pod|h|html)$%s, keys %$set }; } =back =head1 ALGORITHMS #TODO =head2 Module/trace-based additions #TODO For example, when using L or L are added, then also all (relevant) files from the L and L distributions will be included. The only exception is perl itself =head2 Glob/path-based modifications #TODO =head1 EXTENDED GLOB PATTERNS #TODO =head1 SEE ALSO The utility program that comes with this module: L. L, L, L. =head1 AUTHOR Marc Lehmann http://home.schmorp.de/ =cut 1;