-
Notifications
You must be signed in to change notification settings - Fork 0
/
compress.sf
84 lines (62 loc) · 1.92 KB
/
compress.sf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/ruby
# A basic implementation of the UNIX `compress` tool, creating a .Z compressed file, using LZW compression.
# This implementation reads from STDIN and outputs to STDOUT:
# sidef compress.sf < input.txt > output.Z
# Reference:
# Data Compression (Summer 2023) - Lecture 4 - The Unix 'compress' Program
# https://youtube.com/watch?v=1cJL9Va80Pk
# See also:
# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
define (
BUFFER_SIZE = 8*512, # must be a multiple of 8
MAGIC_SIGNATURE = "\x1f\x9d\x90",
)
func compress (FileHandle in_fh, FileHandle out_fh) {
in_fh.binmode(':raw')
out_fh.binmode(':raw')
out_fh.print(MAGIC_SIGNATURE)
var dict_size = 256
var dictionary = Hash(dict_size.of {|i| (i.chr, i) }...)
++dict_size # 256 is the 'RESET' marker
var num_bits = 9
var max_bits = 16
var max_bits_size = (1 << num_bits)
var max_dict_size = (1 << max_bits)
var bitstream = []
var bitstream_size = 0
var output_index = {|symbol|
bitstream << ('%0*b' % (num_bits, dictionary{symbol}) -> flip)
bitstream_size += num_bits
if (bitstream_size % BUFFER_SIZE == 0) {
out_fh.print(pack("b*", bitstream.join))
bitstream = []
bitstream_size = 0
}
}
var w = ''
in_fh.each_char {|c|
var wc = w+c
if (dictionary.has(wc)) {
w = wc
}
else {
output_index.run(w)
if (dict_size < max_dict_size) {
dictionary{wc} = dict_size++
if (dict_size > max_bits_size) {
++num_bits
max_bits_size <<= 1
}
}
w = c
}
}
if (w != '') {
output_index.run(w)
}
if (bitstream.len > 0) {
out_fh.print(pack('b*', bitstream.join))
}
return true
}
compress(STDIN, STDOUT)