ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Perl-Strip/Strip.pm
Revision: 1.2
Committed: Sun Jan 9 10:08:29 2011 UTC (15 years, 4 months ago) by root
Branch: MAIN
CVS Tags: rel-1_0
Changes since 1.1: +19 -4 lines
Log Message:
1.0

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4    
5     =head1 SYNOPSIS
6    
7     use Perl::Strip;
8    
9     =head1 DESCRIPTION
10    
11 root 1.2 This module transforms perl sources into a more compact format. It does
12     this by removing most whitespace, comments, pod, and by some other means.
13    
14     The resulting code looks obfuscated, but perl (and the deparser) don't
15     have any problems with that. Depending on the source file you can expect
16     about 30-60% "compression".
17    
18     The main target for this module is low-diskspace environments, such as
19     L<App::Staticperl>, boot floppy/CDs/flash environments and so on.
20    
21     See also the commandline utility L<perlstrip>.
22    
23 root 1.1 =head1 METHODS
24    
25     The C<Perl::Strip> class is a subclsass of L<PPI::Transform>, and as such
26     inherits all of it's methods, even the ones not documented here.
27    
28     =over 4
29    
30     =cut
31    
32     package Perl::Strip;
33    
34 root 1.2 our $VERSION = '1.0';
35 root 1.1 our $CACHE_VERSION = 1;
36    
37     use common::sense;
38    
39     use PPI;
40    
41     use base PPI::Transform::;
42    
43     =item my $transform = new Perl::Strip key => value...
44    
45     Creates a new Perl::Strip transform object. It supports the following
46     parameters:
47    
48     =over 4
49    
50     =item optimise_size => $bool
51    
52     By default, this module optimises I<compressability>, not raw size. This
53     switch changes that (and makes it slower).
54    
55     =item cache => $path
56    
57     Since this module can take a very long time (minutes for the larger files
58     in the perl distribution), it can utilise a cache directory. The directory
59     will be created if it doesn't exist, and can be deleted at any time.
60    
61     =back
62    
63     =cut
64    
65     # PPI::Transform compatible
66     sub document {
67     my ($self, $doc) = @_;
68    
69 root 1.2 $self->{optimise_size} = 1; # more research is needed
70    
71 root 1.1 # special stripping for unicore/ files
72     if (eval { $doc->child (1)->content =~ /^# .* machine-generated .*mktables / }) {
73    
74     for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
75     my $src = join "", $heredoc->heredoc;
76    
77     # special stripping for unicore swashes and properties
78     # much more could be done by going binary
79     for ($src) {
80     s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem
81     if $self->{optimise_size};
82    
83     # s{
84     # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
85     # }{
86     # # ww - smaller filesize, UU - compress better
87     # pack "C0UU",
88     # hex $1,
89     # length $2 ? (hex $2) - (hex $1) : 0
90     # }gemx;
91    
92     s/#.*\n/\n/mg;
93     s/\s+\n/\n/mg;
94     }
95    
96     # PPI seems to be mostly undocumented
97     $heredoc->{_heredoc} = [split /$/, $src];
98     }
99     }
100    
101     $doc->prune (PPI::Token::Comment::);
102     $doc->prune (PPI::Token::Pod::);
103    
104     # prune END stuff
105     for (my $last = $doc->last_element; $last; ) {
106     my $prev = $last->previous_token;
107    
108     if ($last->isa (PPI::Token::Whitespace::)) {
109     $last->delete;
110     } elsif ($last->isa (PPI::Statement::End::)) {
111     $last->delete;
112     last;
113     } elsif ($last->isa (PPI::Token::Pod::)) {
114     $last->delete;
115     } else {
116     last;
117     }
118    
119     $last = $prev;
120     }
121    
122     # prune some but not all insignificant whitespace
123     for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
124     my $prev = $ws->previous_token;
125     my $next = $ws->next_token;
126    
127     if (!$prev || !$next) {
128     $ws->delete;
129     } else {
130     if (
131     $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
132     or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
133     or $prev->isa (PPI::Token::Structure::)
134     or ($self->{optimise_size} &&
135     ($prev->isa (PPI::Token::Word::)
136     && (PPI::Token::Symbol:: eq ref $next
137     || $next->isa (PPI::Structure::Block::)
138     || $next->isa (PPI::Structure::List::)
139     || $next->isa (PPI::Structure::Condition::)))
140     )
141     ) {
142     $ws->delete;
143     } elsif ($prev->isa (PPI::Token::Whitespace::)) {
144     $ws->{content} = ' ';
145     $prev->delete;
146     } else {
147     $ws->{content} = ' ';
148     }
149     }
150     }
151    
152     # prune whitespace around blocks, also ";" at end of blocks
153     if ($self->{optimise_size}) {
154     # these usually decrease size, but decrease compressability more
155     for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
156     for my $node (@{ $doc->find ($struct) }) {
157     my $n1 = $node->first_token;
158 root 1.2 # my $n2 = $n1->previous_token;
159 root 1.1 my $n3 = $n1->next_token;
160     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
161 root 1.2 # $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); # unsafe! AE::timer $MAX_SIGNAL_LATENCY -($NOW - int$NOW)
162 root 1.1 $n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
163     my $n1 = $node->last_token;
164     my $n2 = $n1->next_token;
165     my $n3 = $n1->previous_token;
166     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
167     $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
168 root 1.2 $n3->{content} = "" # delete seems to trigger a bug inside PPI
169     if $n3 && ($n3->isa (PPI::Token::Whitespace::)
170 root 1.1 || ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
171     }
172     }
173     }
174    
175     # foreach => for
176     for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
177     if (my $n = $node->first_token) {
178     $n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
179     }
180     }
181    
182     # reformat qw() lists which often have lots of whitespace
183     for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
184     if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
185     my ($a, $qw, $b) = ($1, $2, $3);
186     $qw =~ s/^\s+//;
187     $qw =~ s/\s+$//;
188     $qw =~ s/\s+/ /g;
189     $node->{content} = "qw$a$qw$b";
190     }
191     }
192    
193     # prune return at end of sub-blocks
194     #TODO:
195     # PPI::Document
196     # PPI::Statement::Sub
197     # PPI::Token::Word 'sub'
198     # PPI::Token::Whitespace ' '
199     # PPI::Token::Word 'f'
200     # PPI::Structure::Block { ... }
201     # PPI::Statement
202     # PPI::Token::Word 'sub'
203     # PPI::Structure::Block { ... }
204     # PPI::Statement::Break
205     # PPI::Token::Word 'return'
206     # PPI::Token::Whitespace ' '
207     # PPI::Token::Number '5'
208     # PPI::Token::Structure ';'
209     # PPI::Statement::Compound
210     # PPI::Structure::Block { ... }
211     # PPI::Statement::Break
212     # PPI::Token::Word 'return'
213     # PPI::Token::Whitespace ' '
214     # PPI::Token::Number '8'
215     # PPI::Statement::Break
216     # PPI::Token::Word 'return'
217     # PPI::Token::Whitespace ' '
218     # PPI::Token::Number '7'
219    
220     1
221     }
222    
223     =item $perl = $transform->strip ($perl)
224    
225     Strips the perl source in C<$perl> and returns the stripped source.
226    
227     =cut
228    
229     sub strip {
230     my ($self, $src) = @_;
231    
232     my $filter = sub {
233     my $ppi = new PPI::Document \$src
234     or return;
235    
236     $self->document ($ppi)
237     or return;
238    
239     $src = $ppi->serialize;
240     };
241    
242     if (exists $self->{cache} && (2048 <= length $src)) {
243     my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
244    
245     if (open my $fh, "<:perlio", $file) {
246     # zero size means unchanged
247     if (-s $fh) {
248     local $/;
249     $src = <$fh>
250     }
251     } else {
252     my $oldsrc = $src;
253    
254     $filter->();
255    
256     mkdir $self->{cache};
257    
258     if (open my $fh, ">:perlio", "$file~") {
259     # write a zero-byte file if source is unchanged
260     if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
261     close $fh;
262     rename "$file~", $file;
263     }
264     }
265     }
266     } else {
267     $filter->();
268     }
269    
270     $src
271     }
272    
273     =back
274    
275     =head1 SEE ALSO
276    
277     L<App::Staticperl>, L<Perl::Squish>.
278    
279     =head1 AUTHOR
280    
281     Marc Lehmann <schmorp@schmorp.de>
282     http://home.schmorp.de/
283    
284     =cut
285    
286     1;
287