-
Notifications
You must be signed in to change notification settings - Fork 33
/
delta_encoding_with_unary_coding.pl
89 lines (67 loc) · 2.02 KB
/
delta_encoding_with_unary_coding.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/perl
# Author: Trizen
# Date: 13 June 2023
# https://github.com/trizen
# Implementation of the Delta encoding scheme, using unary coding.
# Reference:
# Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction
# https://youtube.com/watch?v=-3H_eDbWNEU
use 5.036;
sub delta_encode ($bytes) {
my @deltas;
my $prev = 0;
while (@$bytes) {
my $curr = shift(@$bytes);
push @deltas, $curr - $prev;
$prev = $curr;
}
my $bitstring = '';
foreach my $d (@deltas) {
if ($d == 0) {
$bitstring .= '0';
}
else {
$bitstring .= '1' . (($d < 0) ? '0' : '1') . ('1' x (abs($d) - 1)) . '0';
}
}
return $bitstring;
}
sub delta_decode ($bitstring) {
my @bits = split(//, $bitstring);
my @deltas;
while (@bits) {
my $bit = shift(@bits);
if ($bit eq '0') {
push @deltas, 0;
}
else {
my $bit = shift(@bits);
my $n = 1;
++$n while (shift(@bits) eq '1');
push @deltas, ($bit eq '1' ? $n : -$n);
}
}
my @acc;
my $prev = 0;
foreach my $d (@deltas) {
$prev += $d;
push @acc, $prev;
}
return \@acc;
}
my $str = "TOBEORNOTTOBEORTOBEORNOT";
my $encoded = delta_encode([unpack('C*', $str)]);
my $decoded = delta_decode($encoded);
say "Encoded: ", "$encoded";
say "Decoded: ", pack('C*', @$decoded);
$str eq pack('C*', @$decoded) or die "error";
{
open my $fh, '<:raw', __FILE__;
my $str = do { local $/; <$fh> };
my $encoded = delta_encode([unpack('C*', $str)]);
my $decoded = delta_decode($encoded);
$str eq pack('C*', @$decoded) or die "error";
}
__END__
Encoded: 111111111111111111111111111111111111111111111111111111111111111111111111111111111111101011110101111111111110111101111111111101111010111011011111100101111010111111111111011110111111111110111101110101111010111111111111011110111111111110111101011101101111110
Decoded: TOBEORNOTTOBEORTOBEORNOT