forked from mathias-madsen/information_theory
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkraft.py
70 lines (56 loc) · 2.42 KB
/
kraft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from typing import List
from typing import Tuple
def build_prefix_code(requested_lengths: List[int],
alphabet: Tuple[str] = ("0", "1")) -> List[str]:
""" Construct a binary prefix code of given codeword lengths.
Parameters:
-----------
requested_lengths : list of ints >= 0
A list of requested codeword lengths (repetitions possible).
alphabet : tuple of strings, default ("0", "1")
The set of characters out of which to construct the codewords.
Returns:
--------
codebook : list of strings
A list of codewords of the given codeword lengths, sorted by
increasing order of length.
Raises:
-------
ValueError:
If the list of codeword lengths violate Kraft's inequality.
Notes:
------
This function implements a greedy code construction algorithm which
builds the prefix code shortest-word-first, adding available codewords
as necessary, and removing all possible extensions of a codeword
whenever it is added to the codebook.
"""
# complain if Kraft's inequality is violated:
if sum(len(alphabet) ** (-k) for k in lengths) > 1.0:
raise ValueError("Codeword lengths %r violate Kraft's inequality."
% requested_lengths)
# we step through the tree of binary strings by increasing length;
# whenever we use a string as a codeword, we remove its descendants
# from consideration as future codewords:
allowed_at_current_length = [""]
codebook = []
for current_length in range(max(requested_lengths) + 1):
# add the required number of codewords at this length
# by cutting them out of the list of available prefixes:
num_required = sum(k == current_length for k in requested_lengths)
for _ in range(num_required):
codebook.append(allowed_at_current_length.pop(0))
# extend all remaining prefixes by one more character:
allowed_at_current_length = [p + s for p in allowed_at_current_length
for s in alphabet]
return codebook
if __name__ == "__main__":
lengths = [1, 1]
code = build_prefix_code(lengths)
assert sorted(code) == ["0", "1"]
lengths = [1, 2, 3, 3]
code = build_prefix_code(lengths)
assert sorted([len(w) for w in code]) == lengths
lengths = [0]
code = build_prefix_code(lengths)
assert code == [""]