-
Notifications
You must be signed in to change notification settings - Fork 162
/
puff.c
433 lines (381 loc) · 15.9 KB
/
puff.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/*
* puff.c
* Copyright (C) 2002-2013 Mark Adler
* For conditions of distribution and use, see copyright notice in puff.h
* version 2.3, 21 Jan 2013
*/
#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */
#include "puff.h" /* prototype for puff() */
#define local static /* for local function definitions */
#define MAXBITS 15 /* maximum bits in a code */
#define MAXLCODES 286 /* maximum number of literal/length codes */
#define MAXDCODES 30 /* maximum number of distance codes */
#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */
#define FIXLCODES 288 /* number of fixed literal/length codes */
/* input and output state */
struct state {
/* output state */
unsigned char *out; /* output buffer */
unsigned long outlen; /* available space at out */
unsigned long outcnt; /* bytes written to out so far */
/* input state */
const unsigned char *in; /* input buffer */
unsigned long inlen; /* available input at in */
unsigned long incnt; /* bytes read so far */
int bitbuf; /* bit buffer */
int bitcnt; /* number of bits in bit buffer */
/* input limit error return state for bits() and decode() */
jmp_buf env;
};
local int bits(struct state *s, int need)
{
long val; /* bit accumulator (can use up to 20 bits) */
/* load at least need bits into val */
val = s->bitbuf;
while (s->bitcnt < need) {
if (s->incnt == s->inlen)
longjmp(s->env, 1); /* out of input */
val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */
s->bitcnt += 8;
}
/* drop need bits and update buffer, always zero to seven bits left */
s->bitbuf = (int)(val >> need);
s->bitcnt -= need;
/* return need bits, zeroing the bits above that */
return (int)(val & ((1L << need) - 1));
}
local int stored(struct state *s)
{
unsigned len; /* length of stored block */
/* discard leftover bits from current byte (assumes s->bitcnt < 8) */
s->bitbuf = 0;
s->bitcnt = 0;
/* get length and check against its one's complement */
if (s->incnt + 4 > s->inlen)
return 2; /* not enough input */
len = s->in[s->incnt++];
len |= s->in[s->incnt++] << 8;
if (s->in[s->incnt++] != (~len & 0xff) ||
s->in[s->incnt++] != ((~len >> 8) & 0xff))
return -2; /* didn't match complement! */
/* copy len bytes from in to out */
if (s->incnt + len > s->inlen)
return 2; /* not enough input */
if (s->out != NIL) {
if (s->outcnt + len > s->outlen)
return 1; /* not enough output space */
while (len--)
s->out[s->outcnt++] = s->in[s->incnt++];
}
else { /* just scanning */
s->outcnt += len;
s->incnt += len;
}
/* done with a valid stored block */
return 0;
}
struct huffman {
short *count; /* number of symbols of each length */
short *symbol; /* canonically ordered symbols */
};
local int decode(struct state *s, const struct huffman *h)
{
int len; /* current number of bits in code */
int code; /* len bits being decoded */
int first; /* first code of length len */
int count; /* number of codes of length len */
int index; /* index of first code of length len in symbol table */
int bitbuf; /* bits from stream */
int left; /* bits left in next or left to process */
short *next; /* next number of codes */
bitbuf = s->bitbuf;
left = s->bitcnt;
code = first = index = 0;
len = 1;
next = h->count + 1;
while (1) {
while (left--) {
code |= bitbuf & 1;
bitbuf >>= 1;
count = *next++;
if (code - count < first) { /* if length len, return symbol */
s->bitbuf = bitbuf;
s->bitcnt = (s->bitcnt - len) & 7;
return h->symbol[index + (code - first)];
}
index += count; /* else update for next length */
first += count;
first <<= 1;
code <<= 1;
len++;
}
left = (MAXBITS+1) - len;
if (left == 0)
break;
if (s->incnt == s->inlen)
longjmp(s->env, 1); /* out of input */
bitbuf = s->in[s->incnt++];
if (left > 8)
left = 8;
}
return -10; /* ran out of codes */
}
local int construct(struct huffman *h, const short *length, int n)
{
int symbol; /* current symbol when stepping through length[] */
int len; /* current length when stepping through h->count[] */
int left; /* number of possible codes left of current length */
short offs[MAXBITS+1]; /* offsets in symbol table for each length */
/* count number of codes of each length */
for (len = 0; len <= MAXBITS; len++)
h->count[len] = 0;
for (symbol = 0; symbol < n; symbol++)
(h->count[length[symbol]])++; /* assumes lengths are within bounds */
if (h->count[0] == n) /* no codes! */
return 0; /* complete, but decode() will fail */
/* check for an over-subscribed or incomplete set of lengths */
left = 1; /* one possible code of zero length */
for (len = 1; len <= MAXBITS; len++) {
left <<= 1; /* one more bit, double codes left */
left -= h->count[len]; /* deduct count from possible codes */
if (left < 0)
return left; /* over-subscribed--return negative */
} /* left > 0 means incomplete */
/* generate offsets into symbol table for each length for sorting */
offs[1] = 0;
for (len = 1; len < MAXBITS; len++)
offs[len + 1] = offs[len] + h->count[len];
/*
* put symbols in table sorted by length, by symbol order within each
* length
*/
for (symbol = 0; symbol < n; symbol++)
if (length[symbol] != 0)
h->symbol[offs[length[symbol]]++] = symbol;
/* return zero for complete set, positive for incomplete set */
return left;
}
local int codes(struct state *s,
const struct huffman *lencode,
const struct huffman *distcode)
{
int symbol; /* decoded symbol */
int len; /* length for copy */
unsigned dist; /* distance for copy */
static const short lens[29] = { /* Size base for length codes 257..285 */
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
static const short lext[29] = { /* Extra bits for length codes 257..285 */
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
static const short dists[30] = { /* Offset base for distance codes 0..29 */
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577};
static const short dext[30] = { /* Extra bits for distance codes 0..29 */
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
/* decode literals and length/distance pairs */
do {
symbol = decode(s, lencode);
if (symbol < 0)
return symbol; /* invalid symbol */
if (symbol < 256) { /* literal: symbol is the byte */
/* write out the literal */
if (s->out != NIL) {
if (s->outcnt == s->outlen)
return 1;
s->out[s->outcnt] = symbol;
}
s->outcnt++;
}
else if (symbol > 256) { /* length */
/* get and compute length */
symbol -= 257;
if (symbol >= 29)
return -10; /* invalid fixed code */
len = lens[symbol] + bits(s, lext[symbol]);
/* get and check distance */
symbol = decode(s, distcode);
if (symbol < 0)
return symbol; /* invalid symbol */
dist = dists[symbol] + bits(s, dext[symbol]);
#ifndef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
if (dist > s->outcnt)
return -11; /* distance too far back */
#endif
/* copy length bytes from distance bytes back */
if (s->out != NIL) {
if (s->outcnt + len > s->outlen)
return 1;
while (len--) {
s->out[s->outcnt] =
#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
dist > s->outcnt ?
0 :
#endif
s->out[s->outcnt - dist];
s->outcnt++;
}
}
else
s->outcnt += len;
}
} while (symbol != 256); /* end of block symbol */
/* done with a valid fixed or dynamic block */
return 0;
}
local int fixed(struct state *s)
{
static int virgin = 1;
static short lencnt[MAXBITS+1], lensym[FIXLCODES];
static short distcnt[MAXBITS+1], distsym[MAXDCODES];
static struct huffman lencode, distcode;
/* build fixed huffman tables if first call (may not be thread safe) */
if (virgin) {
int symbol;
short lengths[FIXLCODES];
/* construct lencode and distcode */
lencode.count = lencnt;
lencode.symbol = lensym;
distcode.count = distcnt;
distcode.symbol = distsym;
/* literal/length table */
for (symbol = 0; symbol < 144; symbol++)
lengths[symbol] = 8;
for (; symbol < 256; symbol++)
lengths[symbol] = 9;
for (; symbol < 280; symbol++)
lengths[symbol] = 7;
for (; symbol < FIXLCODES; symbol++)
lengths[symbol] = 8;
construct(&lencode, lengths, FIXLCODES);
/* distance table */
for (symbol = 0; symbol < MAXDCODES; symbol++)
lengths[symbol] = 5;
construct(&distcode, lengths, MAXDCODES);
/* do this just once */
virgin = 0;
}
/* decode data until end-of-block code */
return codes(s, &lencode, &distcode);
}
local int dynamic(struct state *s)
{
int nlen, ndist, ncode; /* number of lengths in descriptor */
int index; /* index of lengths[] */
int err; /* construct() return value */
short lengths[MAXCODES]; /* descriptor code lengths */
short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */
short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */
struct huffman lencode, distcode; /* length and distance codes */
static const short order[19] = /* permutation of code length codes */
{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
/* construct lencode and distcode */
lencode.count = lencnt;
lencode.symbol = lensym;
distcode.count = distcnt;
distcode.symbol = distsym;
/* get number of lengths in each table, check lengths */
nlen = bits(s, 5) + 257;
ndist = bits(s, 5) + 1;
ncode = bits(s, 4) + 4;
if (nlen > MAXLCODES || ndist > MAXDCODES)
return -3; /* bad counts */
/* read code length code lengths (really), missing lengths are zero */
for (index = 0; index < ncode; index++)
lengths[order[index]] = bits(s, 3);
for (; index < 19; index++)
lengths[order[index]] = 0;
/* build huffman table for code lengths codes (use lencode temporarily) */
err = construct(&lencode, lengths, 19);
if (err != 0) /* require complete code set here */
return -4;
/* read length/literal and distance code length tables */
index = 0;
while (index < nlen + ndist) {
int symbol; /* decoded value */
int len; /* last length to repeat */
symbol = decode(s, &lencode);
if (symbol < 0)
return symbol; /* invalid symbol */
if (symbol < 16) /* length in 0..15 */
lengths[index++] = symbol;
else { /* repeat instruction */
len = 0; /* assume repeating zeros */
if (symbol == 16) { /* repeat last length 3..6 times */
if (index == 0)
return -5; /* no last length! */
len = lengths[index - 1]; /* last length */
symbol = 3 + bits(s, 2);
}
else if (symbol == 17) /* repeat zero 3..10 times */
symbol = 3 + bits(s, 3);
else /* == 18, repeat zero 11..138 times */
symbol = 11 + bits(s, 7);
if (index + symbol > nlen + ndist)
return -6; /* too many lengths! */
while (symbol--) /* repeat last or zero symbol times */
lengths[index++] = len;
}
}
/* check for end-of-block code -- there better be one! */
if (lengths[256] == 0)
return -9;
/* build huffman table for literal/length codes */
err = construct(&lencode, lengths, nlen);
if (err && (err < 0 || nlen != lencode.count[0] + lencode.count[1]))
return -7; /* incomplete code ok only for single length 1 code */
/* build huffman table for distance codes */
err = construct(&distcode, lengths + nlen, ndist);
if (err && (err < 0 || ndist != distcode.count[0] + distcode.count[1]))
return -8; /* incomplete code ok only for single length 1 code */
/* decode data until end-of-block code */
return codes(s, &lencode, &distcode);
}
int puff(unsigned long dictlen, // length of custom dictionary
unsigned char *dest, /* pointer to destination pointer */
unsigned long *destlen, /* amount of output space */
const unsigned char *source, /* pointer to source data pointer */
unsigned long *sourcelen) /* amount of input available */
{
struct state s; /* input/output state */
int last, type; /* block information */
int err; /* return value */
/* initialize output state */
s.out = dest;
s.outlen = *destlen; /* ignored if dest is NIL */
s.outcnt = dictlen;
/* initialize input state */
s.in = source;
s.inlen = *sourcelen;
s.incnt = 0;
s.bitbuf = 0;
s.bitcnt = 0;
/* return if bits() or decode() tries to read past available input */
if (setjmp(s.env) != 0) /* if came back here via longjmp() */
err = 2; /* then skip do-loop, return error */
else {
/* process blocks until last block or error */
do {
last = bits(&s, 1); /* one if last block */
type = bits(&s, 2); /* block type 0..3 */
err = type == 0 ?
stored(&s) :
(type == 1 ?
fixed(&s) :
(type == 2 ?
dynamic(&s) :
-1)); /* type == 3, invalid */
if (err != 0)
break; /* return with error */
} while (!last);
}
/* update the lengths and return */
if (err <= 0) {
*destlen = s.outcnt - dictlen;
*sourcelen = s.incnt;
}
return err;
}