ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Perl-Strip/Strip.pm
Revision: 1.2
Committed: Sun Jan 9 10:08:29 2011 UTC (15 years, 1 month ago) by root
Branch: MAIN
CVS Tags: rel-1_0
Changes since 1.1: +19 -4 lines
Log Message:
1.0

File Contents

# Content
1 =head1 NAME
2
3 Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4
5 =head1 SYNOPSIS
6
7 use Perl::Strip;
8
9 =head1 DESCRIPTION
10
11 This module transforms perl sources into a more compact format. It does
12 this by removing most whitespace, comments, pod, and by some other means.
13
14 The resulting code looks obfuscated, but perl (and the deparser) don't
15 have any problems with that. Depending on the source file you can expect
16 about 30-60% "compression".
17
18 The main target for this module is low-diskspace environments, such as
19 L<App::Staticperl>, boot floppy/CDs/flash environments and so on.
20
21 See also the commandline utility L<perlstrip>.
22
23 =head1 METHODS
24
25 The C<Perl::Strip> class is a subclsass of L<PPI::Transform>, and as such
26 inherits all of it's methods, even the ones not documented here.
27
28 =over 4
29
30 =cut
31
32 package Perl::Strip;
33
34 our $VERSION = '1.0';
35 our $CACHE_VERSION = 1;
36
37 use common::sense;
38
39 use PPI;
40
41 use base PPI::Transform::;
42
43 =item my $transform = new Perl::Strip key => value...
44
45 Creates a new Perl::Strip transform object. It supports the following
46 parameters:
47
48 =over 4
49
50 =item optimise_size => $bool
51
52 By default, this module optimises I<compressability>, not raw size. This
53 switch changes that (and makes it slower).
54
55 =item cache => $path
56
57 Since this module can take a very long time (minutes for the larger files
58 in the perl distribution), it can utilise a cache directory. The directory
59 will be created if it doesn't exist, and can be deleted at any time.
60
61 =back
62
63 =cut
64
65 # PPI::Transform compatible
66 sub document {
67 my ($self, $doc) = @_;
68
69 $self->{optimise_size} = 1; # more research is needed
70
71 # special stripping for unicore/ files
72 if (eval { $doc->child (1)->content =~ /^# .* machine-generated .*mktables / }) {
73
74 for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
75 my $src = join "", $heredoc->heredoc;
76
77 # special stripping for unicore swashes and properties
78 # much more could be done by going binary
79 for ($src) {
80 s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem
81 if $self->{optimise_size};
82
83 # s{
84 # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
85 # }{
86 # # ww - smaller filesize, UU - compress better
87 # pack "C0UU",
88 # hex $1,
89 # length $2 ? (hex $2) - (hex $1) : 0
90 # }gemx;
91
92 s/#.*\n/\n/mg;
93 s/\s+\n/\n/mg;
94 }
95
96 # PPI seems to be mostly undocumented
97 $heredoc->{_heredoc} = [split /$/, $src];
98 }
99 }
100
101 $doc->prune (PPI::Token::Comment::);
102 $doc->prune (PPI::Token::Pod::);
103
104 # prune END stuff
105 for (my $last = $doc->last_element; $last; ) {
106 my $prev = $last->previous_token;
107
108 if ($last->isa (PPI::Token::Whitespace::)) {
109 $last->delete;
110 } elsif ($last->isa (PPI::Statement::End::)) {
111 $last->delete;
112 last;
113 } elsif ($last->isa (PPI::Token::Pod::)) {
114 $last->delete;
115 } else {
116 last;
117 }
118
119 $last = $prev;
120 }
121
122 # prune some but not all insignificant whitespace
123 for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
124 my $prev = $ws->previous_token;
125 my $next = $ws->next_token;
126
127 if (!$prev || !$next) {
128 $ws->delete;
129 } else {
130 if (
131 $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
132 or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
133 or $prev->isa (PPI::Token::Structure::)
134 or ($self->{optimise_size} &&
135 ($prev->isa (PPI::Token::Word::)
136 && (PPI::Token::Symbol:: eq ref $next
137 || $next->isa (PPI::Structure::Block::)
138 || $next->isa (PPI::Structure::List::)
139 || $next->isa (PPI::Structure::Condition::)))
140 )
141 ) {
142 $ws->delete;
143 } elsif ($prev->isa (PPI::Token::Whitespace::)) {
144 $ws->{content} = ' ';
145 $prev->delete;
146 } else {
147 $ws->{content} = ' ';
148 }
149 }
150 }
151
152 # prune whitespace around blocks, also ";" at end of blocks
153 if ($self->{optimise_size}) {
154 # these usually decrease size, but decrease compressability more
155 for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
156 for my $node (@{ $doc->find ($struct) }) {
157 my $n1 = $node->first_token;
158 # my $n2 = $n1->previous_token;
159 my $n3 = $n1->next_token;
160 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
161 # $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); # unsafe! AE::timer $MAX_SIGNAL_LATENCY -($NOW - int$NOW)
162 $n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
163 my $n1 = $node->last_token;
164 my $n2 = $n1->next_token;
165 my $n3 = $n1->previous_token;
166 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
167 $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
168 $n3->{content} = "" # delete seems to trigger a bug inside PPI
169 if $n3 && ($n3->isa (PPI::Token::Whitespace::)
170 || ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
171 }
172 }
173 }
174
175 # foreach => for
176 for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
177 if (my $n = $node->first_token) {
178 $n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
179 }
180 }
181
182 # reformat qw() lists which often have lots of whitespace
183 for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
184 if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
185 my ($a, $qw, $b) = ($1, $2, $3);
186 $qw =~ s/^\s+//;
187 $qw =~ s/\s+$//;
188 $qw =~ s/\s+/ /g;
189 $node->{content} = "qw$a$qw$b";
190 }
191 }
192
193 # prune return at end of sub-blocks
194 #TODO:
195 # PPI::Document
196 # PPI::Statement::Sub
197 # PPI::Token::Word 'sub'
198 # PPI::Token::Whitespace ' '
199 # PPI::Token::Word 'f'
200 # PPI::Structure::Block { ... }
201 # PPI::Statement
202 # PPI::Token::Word 'sub'
203 # PPI::Structure::Block { ... }
204 # PPI::Statement::Break
205 # PPI::Token::Word 'return'
206 # PPI::Token::Whitespace ' '
207 # PPI::Token::Number '5'
208 # PPI::Token::Structure ';'
209 # PPI::Statement::Compound
210 # PPI::Structure::Block { ... }
211 # PPI::Statement::Break
212 # PPI::Token::Word 'return'
213 # PPI::Token::Whitespace ' '
214 # PPI::Token::Number '8'
215 # PPI::Statement::Break
216 # PPI::Token::Word 'return'
217 # PPI::Token::Whitespace ' '
218 # PPI::Token::Number '7'
219
220 1
221 }
222
223 =item $perl = $transform->strip ($perl)
224
225 Strips the perl source in C<$perl> and returns the stripped source.
226
227 =cut
228
229 sub strip {
230 my ($self, $src) = @_;
231
232 my $filter = sub {
233 my $ppi = new PPI::Document \$src
234 or return;
235
236 $self->document ($ppi)
237 or return;
238
239 $src = $ppi->serialize;
240 };
241
242 if (exists $self->{cache} && (2048 <= length $src)) {
243 my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
244
245 if (open my $fh, "<:perlio", $file) {
246 # zero size means unchanged
247 if (-s $fh) {
248 local $/;
249 $src = <$fh>
250 }
251 } else {
252 my $oldsrc = $src;
253
254 $filter->();
255
256 mkdir $self->{cache};
257
258 if (open my $fh, ">:perlio", "$file~") {
259 # write a zero-byte file if source is unchanged
260 if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
261 close $fh;
262 rename "$file~", $file;
263 }
264 }
265 }
266 } else {
267 $filter->();
268 }
269
270 $src
271 }
272
273 =back
274
275 =head1 SEE ALSO
276
277 L<App::Staticperl>, L<Perl::Squish>.
278
279 =head1 AUTHOR
280
281 Marc Lehmann <schmorp@schmorp.de>
282 http://home.schmorp.de/
283
284 =cut
285
286 1;
287