ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Perl-Strip/Strip.pm
Revision: 1.5
Committed: Thu Aug 3 03:06:08 2023 UTC (9 months, 2 weeks ago) by root
Branch: MAIN
CVS Tags: rel-1_2, HEAD
Changes since 1.4: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4
5 =head1 SYNOPSIS
6
7 use Perl::Strip;
8
9 =head1 DESCRIPTION
10
11 This module transforms perl sources into a more compact format. It does
12 this by removing most whitespace, comments, pod, and by some other means.
13
14 The resulting code looks obfuscated, but perl (and the deparser) don't
15 have any problems with that. Depending on the source file you can expect
16 about 30-60% "compression".
17
18 The main target for this module is low-diskspace environments, such as
19 L<App::Staticperl>, boot floppy/CDs/flash environments and so on.
20
21 See also the commandline utility L<perlstrip>.
22
23 =head1 METHODS
24
25 The C<Perl::Strip> class is a subclsass of L<PPI::Transform>, and as such
26 inherits all of it's methods, even the ones not documented here.
27
28 =over 4
29
30 =cut
31
32 package Perl::Strip;
33
34 our $VERSION = '1.2';
35 our $CACHE_VERSION = 3;
36
37 use common::sense;
38
39 use PPI;
40
41 use base PPI::Transform::;
42
43 =item my $transform = new Perl::Strip key => value...
44
45 Creates a new Perl::Strip transform object. It supports the following
46 parameters:
47
48 =over 4
49
50 =item optimise_size => $bool
51
52 By default, this module optimises I<compressability>, not raw size. This
53 switch changes that (and makes it slower).
54
55 =item keep_nl => $bool
56
57 By default, whitespace will either be stripped or replaced by a space. If
58 this option is enabled, then newlines will not be removed. This has the
59 advantage of keeping line number information intact (e.g. for backtraces),
60 but of course doesn't compress as well.
61
62 =item cache => $path
63
64 Since this module can take a very long time (minutes for the larger files
65 in the perl distribution), it can utilise a cache directory. The directory
66 will be created if it doesn't exist, and can be deleted at any time.
67
68 =back
69
70 =cut
71
72 # PPI::Transform compatible
73 sub document {
74 my ($self, $doc) = @_;
75
76 $self->{optimise_size} = 1; # more research is needed
77
78 # special stripping for unicore/ files
79 if (eval { $doc->child (1)->content =~ /^# .* (build by mktables|machine-generated .*mktables) / }) {
80
81 for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
82 my $src = join "", $heredoc->heredoc;
83
84 # special stripping for unicore swashes and properties
85 # much more could be done by going binary
86 for ($src) {
87 s/^(?:0*([0-9a-fA-F]+))?\t(?:0*([0-9a-fA-F]+))?\t(?:0*([0-9a-fA-F]+))?/$1\t$2\t$3/gm
88 if $self->{optimise_size};
89
90 # s{
91 # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
92 # }{
93 # # ww - smaller filesize, UU - compress better
94 # pack "C0UU",
95 # hex $1,
96 # length $2 ? (hex $2) - (hex $1) : 0
97 # }gemx;
98
99 s/#.*\n/\n/mg;
100 s/\s+\n/\n/mg;
101 }
102
103 # PPI seems to be mostly undocumented
104 $heredoc->{_heredoc} = [split /$/, $src];
105 }
106 }
107
108 $doc->prune (PPI::Token::Comment::);
109 $doc->prune (PPI::Token::Pod::);
110
111 # prune END stuff
112 for (my $last = $doc->last_element; $last; ) {
113 my $prev = $last->previous_token;
114
115 if ($last->isa (PPI::Token::Whitespace::)) {
116 $last->delete;
117 } elsif ($last->isa (PPI::Statement::End::)) {
118 $last->delete;
119 last;
120 } elsif ($last->isa (PPI::Token::Pod::)) {
121 $last->delete;
122 } else {
123 last;
124 }
125
126 $last = $prev;
127 }
128
129 # prune some but not all insignificant whitespace
130 for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
131 my $prev = $ws->previous_token;
132 my $next = $ws->next_token;
133
134 if (!$prev || !$next) {
135 $ws->delete;
136 } else {
137 if ($next->isa (PPI::Token::Whitespace::)) {
138 $ws->delete;
139 } elsif (
140 $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
141 or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
142 or $prev->isa (PPI::Token::Structure::)
143 or ($self->{optimise_size} &&
144 ($prev->isa (PPI::Token::Word::)
145 && (PPI::Token::Symbol:: eq ref $next
146 || $next->isa (PPI::Structure::Block::)
147 || $next->isa (PPI::Structure::List::)
148 || $next->isa (PPI::Structure::Condition::)))
149 )
150 ) {
151 # perl has some idiotic warnings about nonexisting operators
152 if ($prev->isa (PPI::Token::Operator::) && $prev->{content} eq "="
153 && $next->isa (PPI::Token::Operator::) && $next->{content} =~ /[+\-]/
154 ) {
155 # avoid "Reverse %s operator" diagnostic
156 } else {
157 $ws->delete;
158 }
159 } else {
160 $ws->{content} = ' ';
161 }
162 }
163 }
164
165 # prune whitespace around blocks, also ";" at end of blocks
166 if ($self->{optimise_size}) {
167 # these usually decrease size, but decrease compressability more
168 for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
169 for my $node (@{ $doc->find ($struct) }) {
170 my $n1 = $node->first_token;
171 # my $n2 = $n1->previous_token;
172 my $n3 = $n1->next_token;
173 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
174 # $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); # unsafe! AE::timer $MAX_SIGNAL_LATENCY -($NOW - int$NOW)
175 $n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
176 my $n1 = $node->last_token;
177 my $n2 = $n1->next_token;
178 my $n3 = $n1->previous_token;
179 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
180 $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
181 $n3->{content} = "" # delete seems to trigger a bug inside PPI
182 if $n3 && ($n3->isa (PPI::Token::Whitespace::)
183 || ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
184 }
185 }
186 }
187
188 # foreach => for
189 for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
190 if (my $n = $node->first_token) {
191 $n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
192 }
193 }
194
195 # reformat qw() lists which often have lots of whitespace
196 for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
197 if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
198 my ($a, $qw, $b) = ($1, $2, $3);
199 $qw =~ s/^\s+//;
200 $qw =~ s/\s+$//;
201 $qw =~ s/\s+/ /g;
202 $node->{content} = "qw$a$qw$b";
203 }
204 }
205
206 # prune return at end of sub-blocks
207 #TODO:
208 # PPI::Document
209 # PPI::Statement::Sub
210 # PPI::Token::Word 'sub'
211 # PPI::Token::Whitespace ' '
212 # PPI::Token::Word 'f'
213 # PPI::Structure::Block { ... }
214 # PPI::Statement
215 # PPI::Token::Word 'sub'
216 # PPI::Structure::Block { ... }
217 # PPI::Statement::Break
218 # PPI::Token::Word 'return'
219 # PPI::Token::Whitespace ' '
220 # PPI::Token::Number '5'
221 # PPI::Token::Structure ';'
222 # PPI::Statement::Compound
223 # PPI::Structure::Block { ... }
224 # PPI::Statement::Break
225 # PPI::Token::Word 'return'
226 # PPI::Token::Whitespace ' '
227 # PPI::Token::Number '8'
228 # PPI::Statement::Break
229 # PPI::Token::Word 'return'
230 # PPI::Token::Whitespace ' '
231 # PPI::Token::Number '7'
232
233 1
234 }
235
236 =item $perl = $transform->strip ($perl)
237
238 Strips the perl source in C<$perl> and returns the stripped source.
239
240 =cut
241
242 sub strip {
243 my ($self, $src) = @_;
244
245 my $filter = sub {
246 my $ppi = new PPI::Document \$src
247 or return;
248
249 $self->document ($ppi)
250 or return;
251
252 $src = $ppi->serialize;
253 };
254
255 if (exists $self->{cache} && (2048 <= length $src)) {
256 my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
257
258 if (open my $fh, "<:perlio", $file) {
259 # zero size means unchanged
260 if (-s $fh) {
261 local $/;
262 $src = <$fh>
263 }
264 } else {
265 my $oldsrc = $src;
266
267 $filter->();
268
269 mkdir $self->{cache};
270
271 if (open my $fh, ">:perlio", "$file~") {
272 # write a zero-byte file if source is unchanged
273 if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
274 close $fh;
275 rename "$file~", $file;
276 }
277 }
278 }
279 } else {
280 $filter->();
281 }
282
283 $src
284 }
285
286 =back
287
288 =head1 SEE ALSO
289
290 L<App::Staticperl>, L<Perl::Squish>.
291
292 =head1 AUTHOR
293
294 Marc Lehmann <schmorp@schmorp.de>
295 http://home.schmorp.de/
296
297 =cut
298
299 1;
300