1 |
/*********************************************************************** |
2 |
** |
3 |
** lzv.c -- an extremly fast compression/decompression-method |
4 |
** |
5 |
** Written by Hermann Vogt |
6 |
** |
7 |
** v 0.5 -- 00/04/10 fix unaligned access (Marc) |
8 |
** v 0.4 -- 00/03/25 adapted for PApp by Marc Lehmann <pcg@goof.com> |
9 |
** v 0.3 -- 94/03/08 aCembler version of rLZV built in. |
10 |
** v 0.2 -- 94/03/04 Changes for usage with DouBle 0.2 built in. |
11 |
** v 0.1 -- 94/03/01 Intensivly tested, removed all known bugs. |
12 |
** v 0.0 -- 94/02/21 First Version. |
13 |
** |
14 |
** Copyright (c) 1994 Hermann Vogt. Redistribution of this file is |
15 |
** permitted under the GNU Public Licence. |
16 |
** |
17 |
** The method presented here is faster and compresses better |
18 |
** than lzrw1 and lzrw1-a. I named it lzv for "Lev-Zimpel-Vogt". |
19 |
** It uses ideas introduced by Ross Williams in his algorithm lzrw1 |
20 |
** [R. N. Williams (1991): "An Extremly Fast ZIV-Lempel Data |
21 |
** Compression Algorithm", Proceedings IEEE Data Compression |
22 |
** Conference, Snowbird, Utah, 362-371] and by Fiala and Green in their |
23 |
** algorithm a1 [E. R. Fiala, D. H. Greene (1989): "Data Compression |
24 |
** with Finite Windows", Communications of the ACM, 4, 490-505]. |
25 |
** Because lzv differs strongly from both, I hope there will be no |
26 |
** patent problems. The hashing-method has been stolen from Jean-loup |
27 |
** Gailly's (patent free) gzip. |
28 |
** |
29 |
** KNOWN PROBLEMS: |
30 |
** - My english is very bad. |
31 |
** - Badly commented. (I hope this will be better in the next |
32 |
** version.) |
33 |
** - I'm not sure if lzv is free from patent problems. |
34 |
** |
35 |
***********************************************************************/ |
36 |
|
37 |
#define HSIZE 0x4000 |
38 |
#define HMASK 0x3fff |
39 |
#define HSHIFT 5 |
40 |
|
41 |
#define MLL 32 /* Maximum len of chain of literals */ |
42 |
#define MML (8+256) /* Maximum len of match */ |
43 |
#define MOFF 8191 /* Maximum offset */ |
44 |
#define HSIZ 16384 /* Size of Hashtable */ |
45 |
|
46 |
/* ugly type names */ |
47 |
|
48 |
typedef unsigned char uch; |
49 |
typedef unsigned short ush; |
50 |
typedef unsigned int uit; |
51 |
|
52 |
#undef ONLY_64K /* 64k-max encoder is faster */ |
53 |
/* but only veeeery slightly */ |
54 |
|
55 |
/* unconditionally aligning does not cost much much, so do it if unsure */ |
56 |
#define align_ushort !defined(__i386) |
57 |
|
58 |
static int |
59 |
wLZV1 (uch * in, uch * out, ush * heap, int len, int out_len) |
60 |
{ |
61 |
uit hval, op, ip, l_len, m_pos, m_off, m_len, maxlen; |
62 |
ush *lzv1_htab = heap; |
63 |
|
64 |
maxlen = out_len; |
65 |
hval = ((in[0] << 5) ^ in[1]) & (HSIZ - 1); |
66 |
ip = op = l_len = 0; |
67 |
|
68 |
do |
69 |
{ |
70 |
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1); |
71 |
m_pos = lzv1_htab[hval]; |
72 |
lzv1_htab[hval] = ip; |
73 |
|
74 |
#ifndef ONLY_64K |
75 |
/* |
76 |
* If you want to compress more than 64K, uncomment |
77 |
* the following lines. |
78 |
*/ |
79 |
|
80 |
m_pos = (ip & ~0xffff) + m_pos; |
81 |
if (m_pos >= ip && m_pos >= 0x10000) |
82 |
m_pos -= 0x10000; |
83 |
#endif |
84 |
|
85 |
if (m_pos < ip |
86 |
&& in[m_pos ] == in[ip ] |
87 |
&& (m_off = ip - m_pos - 1) <= MOFF |
88 |
&& ip + 4 < len |
89 |
#if align_ushort |
90 |
&& in[m_pos + 1] == in[ip + 1] |
91 |
&& in[m_pos + 2] == in[ip + 2] |
92 |
#else |
93 |
&& *(ush *) (in + m_pos + 1) == *(ush *) (in + ip + 1) |
94 |
#endif |
95 |
) |
96 |
{ |
97 |
/* We have found a match */ |
98 |
uit look = len - ip - 2; |
99 |
if (look > MML) |
100 |
look = MML; |
101 |
m_len = 2; |
102 |
|
103 |
do |
104 |
{ |
105 |
m_len++; |
106 |
} |
107 |
while (m_len != look && in[ip + m_len] == in[m_pos + m_len]); |
108 |
|
109 |
if (op + 2 + l_len + 3 >= maxlen) |
110 |
return 0; |
111 |
|
112 |
if (l_len != 0) |
113 |
{ |
114 |
out[op++] = (l_len - 1) << 3; |
115 |
do |
116 |
{ |
117 |
out[op++] = in[ip - l_len--]; |
118 |
} |
119 |
while (l_len != 0); |
120 |
} |
121 |
|
122 |
m_len -= 2; |
123 |
|
124 |
if (m_len <= 6) |
125 |
{ |
126 |
out[op++] = m_len | ((m_off >> 5) & 0xf8); |
127 |
} |
128 |
else |
129 |
{ |
130 |
out[op++] = 0x07 | ((m_off >> 5) & 0xf8); |
131 |
out[op++] = m_len - 7; |
132 |
} |
133 |
|
134 |
out[op++] = m_off & 0xff; |
135 |
ip++; |
136 |
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1); |
137 |
lzv1_htab[hval] = ip; |
138 |
ip++; |
139 |
|
140 |
do |
141 |
{ |
142 |
hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1); |
143 |
lzv1_htab[hval] = ip; |
144 |
ip++; |
145 |
m_len--; |
146 |
} |
147 |
while (0 != m_len); |
148 |
} |
149 |
else |
150 |
{ |
151 |
/* No match found */ |
152 |
ip++; |
153 |
l_len++; |
154 |
|
155 |
if (MLL == l_len) |
156 |
{ |
157 |
if (op + 2 + MLL >= maxlen) |
158 |
return 0; |
159 |
|
160 |
out[op++] = 0xf8; |
161 |
|
162 |
do |
163 |
{ |
164 |
out[op++] = in[ip - l_len--]; |
165 |
} |
166 |
while (l_len != 0); |
167 |
} |
168 |
} |
169 |
} |
170 |
while (ip < len); |
171 |
|
172 |
if (l_len != 0) |
173 |
{ |
174 |
if (op + l_len + 3 >= maxlen) |
175 |
return 0; |
176 |
|
177 |
out[op++] = (l_len - 1) << 3; |
178 |
|
179 |
do |
180 |
{ |
181 |
out[op++] = in[ip - l_len--]; |
182 |
} |
183 |
while (l_len != 0); |
184 |
} |
185 |
return op; |
186 |
} |
187 |
|
188 |
static int |
189 |
rLZV1 (uch const *const in, uch * const out, int ilen, int len) |
190 |
{ |
191 |
register uit tbuf, c_len; |
192 |
uch *const out_end = out + len; |
193 |
register uch *op = out; |
194 |
uch const *const in_end = in + ilen; |
195 |
register uch const *ip = in; |
196 |
|
197 |
do |
198 |
{ |
199 |
tbuf = *ip++; |
200 |
c_len = tbuf & 0x07; |
201 |
|
202 |
if (0 == c_len) |
203 |
{ |
204 |
c_len = (tbuf >> 3) + 1; |
205 |
|
206 |
/*if (op + c_len > out_end) /* too many checks... */ |
207 |
/* return 0;*/ |
208 |
|
209 |
do |
210 |
*op++ = *ip++; |
211 |
while (--c_len); /* effic: memcpy()? */ |
212 |
} |
213 |
else |
214 |
{ |
215 |
register uch *m_pos; |
216 |
|
217 |
if (0x07 == c_len) |
218 |
c_len = *ip++ + 7; |
219 |
m_pos = op - 1 - (((uit) (tbuf & 0xf8) << 5) | *ip++); |
220 |
|
221 |
/* If we don't check this then we segfault (if in user |
222 |
space) or leave process in uninteruptible state (if |
223 |
in kernel) if the data is corrupt. */ |
224 |
if (m_pos < out) |
225 |
return 0; /* Compression error. */ |
226 |
|
227 |
/*if (op + c_len + 2 > out_end) /* too many checks */ |
228 |
/* return 0;*/ |
229 |
|
230 |
*op++ = *m_pos++; |
231 |
*op++ = *m_pos++; |
232 |
|
233 |
do |
234 |
*op++ = *m_pos++; |
235 |
while (--c_len); |
236 |
} |
237 |
} |
238 |
while (op < out_end && ip < in_end); |
239 |
|
240 |
return op - out; |
241 |
} |