--- cvsroot/App-Staticperl/mkbundle 2011/07/14 20:34:51 1.31 +++ cvsroot/App-Staticperl/mkbundle 2023/08/24 10:35:11 1.46 @@ -5,6 +5,8 @@ our $VERBOSE = 1; our $STRIP = "pod"; # none, pod or ppi +our $COMPRESS = "lzf"; +our $KEEPNL = 0; our $UNISTRIP = 1; # always on, try to strip unicore swash data our $PERL = 0; our $APP; @@ -14,11 +16,15 @@ our $IGNORE_ENV = 0; our $ALLOW_DYNAMIC = 0; our $HAVE_DYNAMIC; # maybe useful? +our $EXTRA_CFLAGS = ""; +our $EXTRA_LDFLAGS = ""; +our $EXTRA_LIBS = ""; +# TODO: at least with lzf, OPTIMIZE_SIZE sesm to be a win? (also, does not respect KEEPNL) our $OPTIMISE_SIZE = 0; # optimise for raw file size instead of for compression? our $CACHE; -our $CACHEVER = 1; # do not change unless you know what you are doing +our $CACHEVER = 2; # do not change unless you know what you are doing my $PREFIX = "bundle"; my $PACKAGE = "static"; @@ -78,7 +84,7 @@ syswrite $W_TRACER, "-\n$dir\n$_[1]\n"; - open my $fh, "<:perlio", "$dir/$_[1]" + open my $fh, "<:raw:perlio", "$dir/$_[1]" or warn "ERROR: $dir/$_[1]: $!\n"; $fh @@ -164,7 +170,7 @@ if (length $CACHE and 2048 <= length $src and defined $variant) { my $file = "$CACHE/" . Digest::MD5::md5_hex "$CACHEVER\x00$variant\x00$src"; - if (open my $fh, "<:perlio", $file) { + if (open my $fh, "<:raw:perlio", $file) { print "using cache for $file\n" if $VERBOSE >= 7; @@ -177,7 +183,7 @@ print "creating cache entry $file\n" if $VERBOSE >= 8; - if (open my $fh, ">:perlio", "$file~") { + if (open my $fh, ">:raw:perlio", "$file~") { if ((syswrite $fh, $src) == length $src) { close $fh; rename "$file~", $file; @@ -355,30 +361,36 @@ sub parse_argv { GetOptions - "perl" => \$PERL, - "app=s" => \$APP, + "perl" => \$PERL, + "app=s" => \$APP, - "verbose|v" => sub { ++$VERBOSE }, - "quiet|q" => sub { --$VERBOSE }, + "verbose|v" => sub { ++$VERBOSE }, + "quiet|q" => sub { --$VERBOSE }, - "strip=s" => \$STRIP, - "cache=s" => \$CACHE, # internal option - "eval|e=s" => sub { trace_eval $_[1] }, - "use|M=s" => sub { trace_module $_[1] }, - "boot=s" => sub { cmd_boot $_[1] }, - "add=s" => sub { cmd_add $_[1], 0 }, - "addbin=s" => sub { cmd_add $_[1], 1 }, - "incglob=s" => sub { cmd_incglob $_[1] }, - "include|i=s" => sub { cmd_include $_[1], 1 }, - "exclude|x=s" => sub { cmd_include $_[1], 0 }, - "usepacklists!" => \$PACKLIST, - - "static!" => \$STATIC, - "staticlib=s" => sub { cmd_staticlib $_[1] }, - "allow-dynamic!"=> \$ALLOW_DYNAMIC, - "ignore-env" => \$IGNORE_ENV, + "strip=s" => \$STRIP, + "keepnl" => \$KEEPNL, + "compress=s" => \$COMPRESS, + "cache=s" => \$CACHE, # internal option + "eval|e=s" => sub { trace_eval $_[1] }, + "use|M=s" => sub { trace_module $_[1] }, + "boot=s" => sub { cmd_boot $_[1] }, + "add=s" => sub { cmd_add $_[1], 0 }, + "addbin=s" => sub { cmd_add $_[1], 1 }, + "incglob=s" => sub { cmd_incglob $_[1] }, + "include|i=s" => sub { cmd_include $_[1], 1 }, + "exclude|x=s" => sub { cmd_include $_[1], 0 }, + "usepacklists!" => \$PACKLIST, + + "static!" => \$STATIC, + "staticlib=s" => sub { cmd_staticlib $_[1] }, + "allow-dynamic!" => \$ALLOW_DYNAMIC, + "ignore-env" => \$IGNORE_ENV, + + "extra-cflags=s" => \$EXTRA_CFLAGS, + "extra-ldflags=s" => \$EXTRA_LDFLAGS, + "extra-libs=s" => \$EXTRA_LIBS, - "<>" => sub { cmd_file $_[0] }, + "<>" => sub { cmd_file $_[0] }, or exit 1; } @@ -389,6 +401,9 @@ die "cannot specify both --app and --perl\n" if $PERL and defined $APP; +die "--compress must be either none or lzf\n" + unless $COMPRESS =~ /^(?:none|lzf)\z/; + # required for @INC loading, unfortunately trace_module "PerlIO::scalar"; @@ -545,6 +560,13 @@ print "processing bundle files (try more -v power if you get bored waiting here)...\n" if $VERBOSE >= 1; +my $compress = sub { shift }; + +if ($COMPRESS eq "lzf") { + require Compress::LZF; + $compress = sub { Compress::LZF::compress_best (shift) }; +} + my $data; my @index; my @order = sort { @@ -565,7 +587,7 @@ my $src = ref $path ? $$path : do { - open my $pm, "<", $path + open my $pm, "<:raw:perlio", $path or die "$path: $!"; local $/; @@ -584,7 +606,7 @@ } } - $src = cache +($STRIP eq "ppi" ? "$UNISTRIP,$OPTIMISE_SIZE" : undef), $src, sub { + $src = cache "$STRIP,$UNISTRIP,$KEEPNL,$OPTIMISE_SIZE,$COMPRESS", $src, sub { if ($UNISTRIP && $pm =~ /^unicore\/.*\.pl$/) { print "applying unicore stripping $pm\n" if $VERBOSE >= 6; @@ -620,9 +642,21 @@ if ($STRIP =~ /ppi/i) { require PPI; + # PPI (quite correctly) treats pod in __DATA__ as data, not pod, so + # we don't have to work around Opcode.pm, as with Pod::Strip + if (my $ppi = PPI::Document->new (\$src)) { - $ppi->prune ("PPI::Token::Comment"); - $ppi->prune ("PPI::Token::Pod"); + for my $node ( + @{ $ppi->find (PPI::Token::Comment::) }, + @{ $ppi->find (PPI::Token::Pod::) } + ) { + if ($KEEPNL) { + $node->{content} =~ s/[^\n]//g; + $node->insert_after (PPI::Token::Whitespace->new ("\n")) if length $node->{content}; + } + + $node->delete; + } # prune END stuff for (my $last = $ppi->last_element; $last; ) { @@ -635,6 +669,8 @@ last; } elsif ($last->isa (PPI::Token::Pod::)) { $last->delete; + } elsif ($last->isa (PPI::Token::Comment::)) { + $last->delete; } else { last; } @@ -650,24 +686,42 @@ if (!$prev || !$next) { $ws->delete; } else { - if ( - $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float - or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/ - or $prev->isa (PPI::Token::Structure::) - or ($OPTIMISE_SIZE && - ($prev->isa (PPI::Token::Word::) - && (PPI::Token::Symbol:: eq ref $next - || $next->isa (PPI::Structure::Block::) - || $next->isa (PPI::Structure::List::) - || $next->isa (PPI::Structure::Condition::))) - ) + if ($next->isa (PPI::Token::Whitespace::)) { + # push this whitespace data into the next node + $next->{content} = "$ws->{content}$next->{content}"; + $ws->{content} = ""; + } elsif ( + ( + $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float + or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/ + or $prev->isa (PPI::Token::Structure::) + or ($OPTIMISE_SIZE && + ($prev->isa (PPI::Token::Word::) + && (PPI::Token::Symbol:: eq ref $next + || $next->isa (PPI::Structure::Block::) + || $next->isa (PPI::Structure::List::) + || $next->isa (PPI::Structure::Condition::))) + ) + ) + # perl has some idiotic warning about nonexisting operators (Reverse %s operator) + # also catch "= ~" + && !( + $prev->isa (PPI::Token::Operator::) && $prev->{content} eq "=" + && $next->isa (PPI::Token::Operator::) && $next->{content} =~ /[+\-\~]/ + ) ) { - $ws->delete; - } elsif ($prev->isa (PPI::Token::Whitespace::)) { - $ws->{content} = ' '; - $prev->delete; + if ($KEEPNL) { + $ws->{content} =~ s/[^\n]//g; + } else { + $ws->{content} = ''; + } } else { - $ws->{content} = ' '; + if ($KEEPNL) { + $ws->{content} =~ s/[^\n]//g; + $ws->{content} ||= ' '; # keep at least one space + } else { + $ws->{content} = ' '; + } } } } @@ -741,6 +795,8 @@ # } } + $src = $compress->($src); + print "adding $pm (original size $size, stored size ", length $src, ")\n" if $VERBOSE >= 2; @@ -802,6 +858,116 @@ EOF ############################################################################# +# lzf decompressor + +if ($COMPRESS eq "lzf") { + print $fh <<'EOF'; +/* stripped down/perlified version of lzf_d.c from liblzf-3.7 */ + +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif + +static unsigned int +lzf_decompress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len) +{ + U8 const *ip = (const U8 *)in_data; + U8 *op = (U8 *)out_data; + U8 const *const in_end = ip + in_len; + U8 *const out_end = op + out_len; + + do + { + unsigned int ctrl = *ip++; + + if (ctrl < (1 << 5)) /* literal run */ + { + ctrl++; + + if (op + ctrl > out_end) + return 0; + +#ifdef lzf_movsb + lzf_movsb (op, ip, ctrl); +#else + while (ctrl--) + *op++ = *ip++; +#endif + } + else /* back reference */ + { + unsigned int len = ctrl >> 5; + + U8 *ref = op - ((ctrl & 0x1f) << 8) - 1; + + if (len == 7) + len += *ip++; + + ref -= *ip++; + + if (op + len + 2 > out_end) + return 0; + + if (ref < (U8 *)out_data) + return 0; + + len += 2; +#ifdef lzf_movsb + lzf_movsb (op, ref, len); +#else + do + *op++ = *ref++; + while (--len); +#endif + } + } + while (ip < in_end); + + return op - (U8 *)out_data; +} + +static SV * +static_to_sv (const char *ptr, STRLEN len) +{ + SV *res; + const U8 *p = (const U8 *)ptr; + + if (len == 0) /* empty */ + res = newSVpvn ("", 0); + else if (*p == 0) /* not compressed */ + res = newSVpvn (p + 1, len - 1); + else /* lzf compressed, with UTF-8-encoded original size in front */ + { + STRLEN ulenlen; + UV ulen = utf8n_to_uvchr (p, len, &ulenlen, 0); + + p += ulenlen; + len -= ulenlen; + + res = NEWSV (0, ulen); + sv_upgrade (res, SVt_PV); + SvPOK_only (res); + lzf_decompress (p, len, SvPVX (res), ulen); + SvCUR_set (res, ulen); + } + + return res; +} + +EOF +} else { + print $fh <= 1; - $ccopts = "$Config{ccflags} $Config{optimize} $Config{cppflags} -I\"$Config{archlibexp}/CORE\""; + $ccopts = "$Config{ccflags} $Config{optimize} $Config{cppflags} -I$Config{archlibexp}/CORE $EXTRA_CFLAGS"; $ccopts =~ s/([\(\)])/\\$1/g; open my $fh, ">$PREFIX.ccopts" @@ -1191,10 +1369,10 @@ $ldopts = $STATIC ? "-static " : ""; - $ldopts .= "$Config{ccdlflags} $Config{ldflags} @libs $Config{archlibexp}/CORE/$Config{libperl} $Config{perllibs}"; + $ldopts .= "$Config{ccdlflags} $Config{ldflags} $EXTRA_LDFLAGS @libs $Config{archlibexp}/CORE/$Config{libperl} $Config{perllibs} $EXTRA_LIBS"; my %seen; - $ldopts .= " $_" for reverse grep !$seen{$_}++, reverse ($extralibs =~ /(\S+)/g); + $ldopts .= " $_" for reverse grep !$seen{$_}++, reverse +($extralibs =~ /(\S+)/g); for (@staticlibs) { $ldopts =~ s/(^|\s) (-l\Q$_\E) ($|\s)/$1-Wl,-Bstatic $2 -Wl,-Bdynamic$3/gx; @@ -1223,8 +1401,8 @@ system $build; -# unlink "$PREFIX.$_" -# for qw(ccopts ldopts c h); + unlink "$PREFIX.$_" + for qw(ccopts ldopts c h); print "\n" if $VERBOSE >= 1;