-
Notifications
You must be signed in to change notification settings - Fork 33
/
compress.pl
90 lines (66 loc) · 2.12 KB
/
compress.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/perl
# Author: Trizen
# Date: 05 May 2023
# https://github.com/trizen
# A basic implementation of the UNIX `compress` tool, creating a .Z compressed file, using LZW compression.
# This implementation reads from STDIN and outputs to STDOUT:
# perl compress.pl < input.txt > output.Z
# Reference:
# Data Compression (Summer 2023) - Lecture 4 - The Unix 'compress' Program
# https://youtube.com/watch?v=1cJL9Va80Pk
# See also:
# https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
use 5.036;
use constant {
BUFFER_SIZE => 8 * 512, # must be a multiple of 8
MAGIC_SIGNATURE => "\x1f\x9d\x90",
};
sub compress ($in_fh, $out_fh) {
binmode($in_fh, ':raw');
binmode($out_fh, ':raw');
print {$out_fh} MAGIC_SIGNATURE;
my $dict_size = 256;
my %dictionary = (map { (chr($_), $_) } 0 .. $dict_size - 1);
++$dict_size; # 256 is the 'RESET' marker
my $num_bits = 9;
my $max_bits = 16;
my $max_bits_size = (1 << $num_bits);
my $max_dict_size = (1 << $max_bits);
my $bitstream = '';
my $bitstream_size = 0;
my sub output_index ($symbol) {
$bitstream .= reverse(sprintf('%0*b', $num_bits, $dictionary{$symbol}));
$bitstream_size += $num_bits;
if ($bitstream_size % BUFFER_SIZE == 0) {
print {$out_fh} pack("b*", $bitstream);
$bitstream = '';
$bitstream_size = 0;
}
}
my $w = '';
while (defined(my $c = getc($in_fh))) {
my $wc = $w . $c;
if (exists($dictionary{$wc})) {
$w = $wc;
}
else {
output_index($w);
if ($dict_size < $max_dict_size) {
$dictionary{$wc} = $dict_size++;
if ($dict_size > $max_bits_size) {
++$num_bits;
$max_bits_size <<= 1;
}
}
$w = $c;
}
}
if ($w ne '') {
output_index($w);
}
if ($bitstream ne '') {
print {$out_fh} pack('b*', $bitstream);
}
return 1;
}
compress(\*STDIN, \*STDOUT);