ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Perl-Strip/Strip.pm
Revision: 1.1
Committed: Sat Jan 8 05:40:06 2011 UTC (15 years, 5 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4
5 =head1 SYNOPSIS
6
7 use Perl::Strip;
8
9 =head1 DESCRIPTION
10
11 =head1 METHODS
12
13 The C<Perl::Strip> class is a subclsass of L<PPI::Transform>, and as such
14 inherits all of it's methods, even the ones not documented here.
15
16 =over 4
17
18 =cut
19
20 package Perl::Strip;
21
22 our $VERSION = '0.1';
23 our $CACHE_VERSION = 1;
24
25 use common::sense;
26
27 use PPI;
28
29 use base PPI::Transform::;
30
31 =item my $transform = new Perl::Strip key => value...
32
33 Creates a new Perl::Strip transform object. It supports the following
34 parameters:
35
36 =over 4
37
38 =item optimise_size => $bool
39
40 By default, this module optimises I<compressability>, not raw size. This
41 switch changes that (and makes it slower).
42
43 =item cache => $path
44
45 Since this module can take a very long time (minutes for the larger files
46 in the perl distribution), it can utilise a cache directory. The directory
47 will be created if it doesn't exist, and can be deleted at any time.
48
49 =back
50
51 =cut
52
53 # PPI::Transform compatible
54 sub document {
55 my ($self, $doc) = @_;
56
57 # special stripping for unicore/ files
58 if (eval { $doc->child (1)->content =~ /^# .* machine-generated .*mktables / }) {
59
60 for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
61 my $src = join "", $heredoc->heredoc;
62
63 # special stripping for unicore swashes and properties
64 # much more could be done by going binary
65 for ($src) {
66 s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem
67 if $self->{optimise_size};
68
69 # s{
70 # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
71 # }{
72 # # ww - smaller filesize, UU - compress better
73 # pack "C0UU",
74 # hex $1,
75 # length $2 ? (hex $2) - (hex $1) : 0
76 # }gemx;
77
78 s/#.*\n/\n/mg;
79 s/\s+\n/\n/mg;
80 }
81
82 # PPI seems to be mostly undocumented
83 $heredoc->{_heredoc} = [split /$/, $src];
84 }
85 }
86
87 $doc->prune (PPI::Token::Comment::);
88 $doc->prune (PPI::Token::Pod::);
89
90 # prune END stuff
91 for (my $last = $doc->last_element; $last; ) {
92 my $prev = $last->previous_token;
93
94 if ($last->isa (PPI::Token::Whitespace::)) {
95 $last->delete;
96 } elsif ($last->isa (PPI::Statement::End::)) {
97 $last->delete;
98 last;
99 } elsif ($last->isa (PPI::Token::Pod::)) {
100 $last->delete;
101 } else {
102 last;
103 }
104
105 $last = $prev;
106 }
107
108 # prune some but not all insignificant whitespace
109 for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
110 my $prev = $ws->previous_token;
111 my $next = $ws->next_token;
112
113 if (!$prev || !$next) {
114 $ws->delete;
115 } else {
116 if (
117 $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
118 or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
119 or $prev->isa (PPI::Token::Structure::)
120 or ($self->{optimise_size} &&
121 ($prev->isa (PPI::Token::Word::)
122 && (PPI::Token::Symbol:: eq ref $next
123 || $next->isa (PPI::Structure::Block::)
124 || $next->isa (PPI::Structure::List::)
125 || $next->isa (PPI::Structure::Condition::)))
126 )
127 ) {
128 $ws->delete;
129 } elsif ($prev->isa (PPI::Token::Whitespace::)) {
130 $ws->{content} = ' ';
131 $prev->delete;
132 } else {
133 $ws->{content} = ' ';
134 }
135 }
136 }
137
138 # prune whitespace around blocks, also ";" at end of blocks
139 if ($self->{optimise_size}) {
140 # these usually decrease size, but decrease compressability more
141 for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
142 for my $node (@{ $doc->find ($struct) }) {
143 my $n1 = $node->first_token;
144 my $n2 = $n1->previous_token;
145 my $n3 = $n1->next_token;
146 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
147 $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
148 $n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
149 my $n1 = $node->last_token;
150 my $n2 = $n1->next_token;
151 my $n3 = $n1->previous_token;
152 $n1->delete if $n1->isa (PPI::Token::Whitespace::);
153 $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
154 $n3->delete if $n3 && ($n3->isa (PPI::Token::Whitespace::)
155 || ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
156 }
157 }
158 }
159
160 # foreach => for
161 for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
162 if (my $n = $node->first_token) {
163 $n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
164 }
165 }
166
167 # reformat qw() lists which often have lots of whitespace
168 for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
169 if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
170 my ($a, $qw, $b) = ($1, $2, $3);
171 $qw =~ s/^\s+//;
172 $qw =~ s/\s+$//;
173 $qw =~ s/\s+/ /g;
174 $node->{content} = "qw$a$qw$b";
175 }
176 }
177
178 # prune return at end of sub-blocks
179 #TODO:
180 # PPI::Document
181 # PPI::Statement::Sub
182 # PPI::Token::Word 'sub'
183 # PPI::Token::Whitespace ' '
184 # PPI::Token::Word 'f'
185 # PPI::Structure::Block { ... }
186 # PPI::Statement
187 # PPI::Token::Word 'sub'
188 # PPI::Structure::Block { ... }
189 # PPI::Statement::Break
190 # PPI::Token::Word 'return'
191 # PPI::Token::Whitespace ' '
192 # PPI::Token::Number '5'
193 # PPI::Token::Structure ';'
194 # PPI::Statement::Compound
195 # PPI::Structure::Block { ... }
196 # PPI::Statement::Break
197 # PPI::Token::Word 'return'
198 # PPI::Token::Whitespace ' '
199 # PPI::Token::Number '8'
200 # PPI::Statement::Break
201 # PPI::Token::Word 'return'
202 # PPI::Token::Whitespace ' '
203 # PPI::Token::Number '7'
204
205 1
206 }
207
208 =item $perl = $transform->strip ($perl)
209
210 Strips the perl source in C<$perl> and returns the stripped source.
211
212 =cut
213
214 sub strip {
215 my ($self, $src) = @_;
216
217 my $filter = sub {
218 my $ppi = new PPI::Document \$src
219 or return;
220
221 $self->document ($ppi)
222 or return;
223
224 $src = $ppi->serialize;
225 };
226
227 if (exists $self->{cache} && (2048 <= length $src)) {
228 my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
229
230 if (open my $fh, "<:perlio", $file) {
231 # zero size means unchanged
232 if (-s $fh) {
233 local $/;
234 $src = <$fh>
235 }
236 } else {
237 my $oldsrc = $src;
238
239 $filter->();
240
241 mkdir $self->{cache};
242
243 if (open my $fh, ">:perlio", "$file~") {
244 # write a zero-byte file if source is unchanged
245 if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
246 close $fh;
247 rename "$file~", $file;
248 }
249 }
250 }
251 } else {
252 $filter->();
253 }
254
255 $src
256 }
257
258 =back
259
260 =head1 SEE ALSO
261
262 L<App::Staticperl>, L<Perl::Squish>.
263
264 =head1 AUTHOR
265
266 Marc Lehmann <schmorp@schmorp.de>
267 http://home.schmorp.de/
268
269 =cut
270
271 1;
272