ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Compress-LZV1/lzv1/lzv1.c
Revision: 1.1
Committed: Thu Sep 27 18:35:29 2001 UTC (22 years, 8 months ago) by root
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Log Message:
*** empty log message ***

File Contents

# Content
1 /***********************************************************************
2 **
3 ** lzv.c -- an extremly fast compression/decompression-method
4 **
5 ** Written by Hermann Vogt
6 **
7 ** v 0.5 -- 00/04/10 fix unaligned access (Marc)
8 ** v 0.4 -- 00/03/25 adapted for PApp by Marc Lehmann <pcg@goof.com>
9 ** v 0.3 -- 94/03/08 aCembler version of rLZV built in.
10 ** v 0.2 -- 94/03/04 Changes for usage with DouBle 0.2 built in.
11 ** v 0.1 -- 94/03/01 Intensivly tested, removed all known bugs.
12 ** v 0.0 -- 94/02/21 First Version.
13 **
14 ** Copyright (c) 1994 Hermann Vogt. Redistribution of this file is
15 ** permitted under the GNU Public Licence.
16 **
17 ** The method presented here is faster and compresses better
18 ** than lzrw1 and lzrw1-a. I named it lzv for "Lev-Zimpel-Vogt".
19 ** It uses ideas introduced by Ross Williams in his algorithm lzrw1
20 ** [R. N. Williams (1991): "An Extremly Fast ZIV-Lempel Data
21 ** Compression Algorithm", Proceedings IEEE Data Compression
22 ** Conference, Snowbird, Utah, 362-371] and by Fiala and Green in their
23 ** algorithm a1 [E. R. Fiala, D. H. Greene (1989): "Data Compression
24 ** with Finite Windows", Communications of the ACM, 4, 490-505].
25 ** Because lzv differs strongly from both, I hope there will be no
26 ** patent problems. The hashing-method has been stolen from Jean-loup
27 ** Gailly's (patent free) gzip.
28 **
29 ** KNOWN PROBLEMS:
30 ** - My english is very bad.
31 ** - Badly commented. (I hope this will be better in the next
32 ** version.)
33 ** - I'm not sure if lzv is free from patent problems.
34 **
35 ***********************************************************************/
36
37 #define HSIZE 0x4000
38 #define HMASK 0x3fff
39 #define HSHIFT 5
40
41 #define MLL 32 /* Maximum len of chain of literals */
42 #define MML (8+256) /* Maximum len of match */
43 #define MOFF 8191 /* Maximum offset */
44 #define HSIZ 16384 /* Size of Hashtable */
45
46 /* ugly type names */
47
48 typedef unsigned char uch;
49 typedef unsigned short ush;
50 typedef unsigned int uit;
51
52 #undef ONLY_64K /* 64k-max encoder is faster */
53 /* but only veeeery slightly */
54
55 /* unconditionally aligning does not cost much much, so do it if unsure */
56 #define align_ushort !defined(__i386)
57
58 static int
59 wLZV1 (uch * in, uch * out, ush * heap, int len, int out_len)
60 {
61 uit hval, op, ip, l_len, m_pos, m_off, m_len, maxlen;
62 ush *lzv1_htab = heap;
63
64 maxlen = out_len;
65 hval = ((in[0] << 5) ^ in[1]) & (HSIZ - 1);
66 ip = op = l_len = 0;
67
68 do
69 {
70 hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
71 m_pos = lzv1_htab[hval];
72 lzv1_htab[hval] = ip;
73
74 #ifndef ONLY_64K
75 /*
76 * If you want to compress more than 64K, uncomment
77 * the following lines.
78 */
79
80 m_pos = (ip & ~0xffff) + m_pos;
81 if (m_pos >= ip && m_pos >= 0x10000)
82 m_pos -= 0x10000;
83 #endif
84
85 if (m_pos < ip
86 && in[m_pos ] == in[ip ]
87 && (m_off = ip - m_pos - 1) <= MOFF
88 && ip + 4 < len
89 #if align_ushort
90 && in[m_pos + 1] == in[ip + 1]
91 && in[m_pos + 2] == in[ip + 2]
92 #else
93 && *(ush *) (in + m_pos + 1) == *(ush *) (in + ip + 1)
94 #endif
95 )
96 {
97 /* We have found a match */
98 uit look = len - ip - 2;
99 if (look > MML)
100 look = MML;
101 m_len = 2;
102
103 do
104 {
105 m_len++;
106 }
107 while (m_len != look && in[ip + m_len] == in[m_pos + m_len]);
108
109 if (op + 2 + l_len + 3 >= maxlen)
110 return 0;
111
112 if (l_len != 0)
113 {
114 out[op++] = (l_len - 1) << 3;
115 do
116 {
117 out[op++] = in[ip - l_len--];
118 }
119 while (l_len != 0);
120 }
121
122 m_len -= 2;
123
124 if (m_len <= 6)
125 {
126 out[op++] = m_len | ((m_off >> 5) & 0xf8);
127 }
128 else
129 {
130 out[op++] = 0x07 | ((m_off >> 5) & 0xf8);
131 out[op++] = m_len - 7;
132 }
133
134 out[op++] = m_off & 0xff;
135 ip++;
136 hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
137 lzv1_htab[hval] = ip;
138 ip++;
139
140 do
141 {
142 hval = ((hval << 5) ^ in[ip + 2]) & (HSIZ - 1);
143 lzv1_htab[hval] = ip;
144 ip++;
145 m_len--;
146 }
147 while (0 != m_len);
148 }
149 else
150 {
151 /* No match found */
152 ip++;
153 l_len++;
154
155 if (MLL == l_len)
156 {
157 if (op + 2 + MLL >= maxlen)
158 return 0;
159
160 out[op++] = 0xf8;
161
162 do
163 {
164 out[op++] = in[ip - l_len--];
165 }
166 while (l_len != 0);
167 }
168 }
169 }
170 while (ip < len);
171
172 if (l_len != 0)
173 {
174 if (op + l_len + 3 >= maxlen)
175 return 0;
176
177 out[op++] = (l_len - 1) << 3;
178
179 do
180 {
181 out[op++] = in[ip - l_len--];
182 }
183 while (l_len != 0);
184 }
185 return op;
186 }
187
188 static int
189 rLZV1 (uch const *const in, uch * const out, int ilen, int len)
190 {
191 register uit tbuf, c_len;
192 uch *const out_end = out + len;
193 register uch *op = out;
194 uch const *const in_end = in + ilen;
195 register uch const *ip = in;
196
197 do
198 {
199 tbuf = *ip++;
200 c_len = tbuf & 0x07;
201
202 if (0 == c_len)
203 {
204 c_len = (tbuf >> 3) + 1;
205
206 /*if (op + c_len > out_end) /* too many checks... */
207 /* return 0;*/
208
209 do
210 *op++ = *ip++;
211 while (--c_len); /* effic: memcpy()? */
212 }
213 else
214 {
215 register uch *m_pos;
216
217 if (0x07 == c_len)
218 c_len = *ip++ + 7;
219 m_pos = op - 1 - (((uit) (tbuf & 0xf8) << 5) | *ip++);
220
221 /* If we don't check this then we segfault (if in user
222 space) or leave process in uninteruptible state (if
223 in kernel) if the data is corrupt. */
224 if (m_pos < out)
225 return 0; /* Compression error. */
226
227 /*if (op + c_len + 2 > out_end) /* too many checks */
228 /* return 0;*/
229
230 *op++ = *m_pos++;
231 *op++ = *m_pos++;
232
233 do
234 *op++ = *m_pos++;
235 while (--c_len);
236 }
237 }
238 while (op < out_end && ip < in_end);
239
240 return op - out;
241 }