ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Perl-Strip/Strip.pm
Revision: 1.1
Committed: Sat Jan 8 05:40:06 2011 UTC (13 years, 4 months ago) by root
Branch: MAIN
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4    
5     =head1 SYNOPSIS
6    
7     use Perl::Strip;
8    
9     =head1 DESCRIPTION
10    
11     =head1 METHODS
12    
13     The C<Perl::Strip> class is a subclsass of L<PPI::Transform>, and as such
14     inherits all of it's methods, even the ones not documented here.
15    
16     =over 4
17    
18     =cut
19    
20     package Perl::Strip;
21    
22     our $VERSION = '0.1';
23     our $CACHE_VERSION = 1;
24    
25     use common::sense;
26    
27     use PPI;
28    
29     use base PPI::Transform::;
30    
31     =item my $transform = new Perl::Strip key => value...
32    
33     Creates a new Perl::Strip transform object. It supports the following
34     parameters:
35    
36     =over 4
37    
38     =item optimise_size => $bool
39    
40     By default, this module optimises I<compressability>, not raw size. This
41     switch changes that (and makes it slower).
42    
43     =item cache => $path
44    
45     Since this module can take a very long time (minutes for the larger files
46     in the perl distribution), it can utilise a cache directory. The directory
47     will be created if it doesn't exist, and can be deleted at any time.
48    
49     =back
50    
51     =cut
52    
53     # PPI::Transform compatible
54     sub document {
55     my ($self, $doc) = @_;
56    
57     # special stripping for unicore/ files
58     if (eval { $doc->child (1)->content =~ /^# .* machine-generated .*mktables / }) {
59    
60     for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
61     my $src = join "", $heredoc->heredoc;
62    
63     # special stripping for unicore swashes and properties
64     # much more could be done by going binary
65     for ($src) {
66     s/^([0-9a-fA-F]+)\t([0-9a-fA-F]+)\t/sprintf "%X\t%X", hex $1, hex $2/gem
67     if $self->{optimise_size};
68    
69     # s{
70     # ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
71     # }{
72     # # ww - smaller filesize, UU - compress better
73     # pack "C0UU",
74     # hex $1,
75     # length $2 ? (hex $2) - (hex $1) : 0
76     # }gemx;
77    
78     s/#.*\n/\n/mg;
79     s/\s+\n/\n/mg;
80     }
81    
82     # PPI seems to be mostly undocumented
83     $heredoc->{_heredoc} = [split /$/, $src];
84     }
85     }
86    
87     $doc->prune (PPI::Token::Comment::);
88     $doc->prune (PPI::Token::Pod::);
89    
90     # prune END stuff
91     for (my $last = $doc->last_element; $last; ) {
92     my $prev = $last->previous_token;
93    
94     if ($last->isa (PPI::Token::Whitespace::)) {
95     $last->delete;
96     } elsif ($last->isa (PPI::Statement::End::)) {
97     $last->delete;
98     last;
99     } elsif ($last->isa (PPI::Token::Pod::)) {
100     $last->delete;
101     } else {
102     last;
103     }
104    
105     $last = $prev;
106     }
107    
108     # prune some but not all insignificant whitespace
109     for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
110     my $prev = $ws->previous_token;
111     my $next = $ws->next_token;
112    
113     if (!$prev || !$next) {
114     $ws->delete;
115     } else {
116     if (
117     $next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,|=|!|!=|==|=>)$/ # no ., because of digits. == float
118     or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,|=|\.|!|!=|==|=>)$/
119     or $prev->isa (PPI::Token::Structure::)
120     or ($self->{optimise_size} &&
121     ($prev->isa (PPI::Token::Word::)
122     && (PPI::Token::Symbol:: eq ref $next
123     || $next->isa (PPI::Structure::Block::)
124     || $next->isa (PPI::Structure::List::)
125     || $next->isa (PPI::Structure::Condition::)))
126     )
127     ) {
128     $ws->delete;
129     } elsif ($prev->isa (PPI::Token::Whitespace::)) {
130     $ws->{content} = ' ';
131     $prev->delete;
132     } else {
133     $ws->{content} = ' ';
134     }
135     }
136     }
137    
138     # prune whitespace around blocks, also ";" at end of blocks
139     if ($self->{optimise_size}) {
140     # these usually decrease size, but decrease compressability more
141     for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
142     for my $node (@{ $doc->find ($struct) }) {
143     my $n1 = $node->first_token;
144     my $n2 = $n1->previous_token;
145     my $n3 = $n1->next_token;
146     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
147     $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
148     $n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
149     my $n1 = $node->last_token;
150     my $n2 = $n1->next_token;
151     my $n3 = $n1->previous_token;
152     $n1->delete if $n1->isa (PPI::Token::Whitespace::);
153     $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
154     $n3->delete if $n3 && ($n3->isa (PPI::Token::Whitespace::)
155     || ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
156     }
157     }
158     }
159    
160     # foreach => for
161     for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
162     if (my $n = $node->first_token) {
163     $n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
164     }
165     }
166    
167     # reformat qw() lists which often have lots of whitespace
168     for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
169     if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
170     my ($a, $qw, $b) = ($1, $2, $3);
171     $qw =~ s/^\s+//;
172     $qw =~ s/\s+$//;
173     $qw =~ s/\s+/ /g;
174     $node->{content} = "qw$a$qw$b";
175     }
176     }
177    
178     # prune return at end of sub-blocks
179     #TODO:
180     # PPI::Document
181     # PPI::Statement::Sub
182     # PPI::Token::Word 'sub'
183     # PPI::Token::Whitespace ' '
184     # PPI::Token::Word 'f'
185     # PPI::Structure::Block { ... }
186     # PPI::Statement
187     # PPI::Token::Word 'sub'
188     # PPI::Structure::Block { ... }
189     # PPI::Statement::Break
190     # PPI::Token::Word 'return'
191     # PPI::Token::Whitespace ' '
192     # PPI::Token::Number '5'
193     # PPI::Token::Structure ';'
194     # PPI::Statement::Compound
195     # PPI::Structure::Block { ... }
196     # PPI::Statement::Break
197     # PPI::Token::Word 'return'
198     # PPI::Token::Whitespace ' '
199     # PPI::Token::Number '8'
200     # PPI::Statement::Break
201     # PPI::Token::Word 'return'
202     # PPI::Token::Whitespace ' '
203     # PPI::Token::Number '7'
204    
205     1
206     }
207    
208     =item $perl = $transform->strip ($perl)
209    
210     Strips the perl source in C<$perl> and returns the stripped source.
211    
212     =cut
213    
214     sub strip {
215     my ($self, $src) = @_;
216    
217     my $filter = sub {
218     my $ppi = new PPI::Document \$src
219     or return;
220    
221     $self->document ($ppi)
222     or return;
223    
224     $src = $ppi->serialize;
225     };
226    
227     if (exists $self->{cache} && (2048 <= length $src)) {
228     my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
229    
230     if (open my $fh, "<:perlio", $file) {
231     # zero size means unchanged
232     if (-s $fh) {
233     local $/;
234     $src = <$fh>
235     }
236     } else {
237     my $oldsrc = $src;
238    
239     $filter->();
240    
241     mkdir $self->{cache};
242    
243     if (open my $fh, ">:perlio", "$file~") {
244     # write a zero-byte file if source is unchanged
245     if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
246     close $fh;
247     rename "$file~", $file;
248     }
249     }
250     }
251     } else {
252     $filter->();
253     }
254    
255     $src
256     }
257    
258     =back
259    
260     =head1 SEE ALSO
261    
262     L<App::Staticperl>, L<Perl::Squish>.
263    
264     =head1 AUTHOR
265    
266     Marc Lehmann <schmorp@schmorp.de>
267     http://home.schmorp.de/
268    
269     =cut
270    
271     1;
272