-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
114 lines (91 loc) · 2.95 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from book import Book
from character import Character
from tabulate import tabulate
def shannon_fano_avg(characters):
total_chars = len(characters)
bits_avg = 0
for char in characters:
bits_avg += len(char.get_code())
bits_avg /= total_chars
return bits_avg
def sum_arr(characters):
sum = 0
for char in characters:
sum += char.get_probability()
return sum
def print_characters(characters, characters_dict):
all_data = []
for char in characters:
char_data = []
char_data.append(char.get_name())
char_data.append(ord(char.get_name()))
char_data.append(characters_dict[char.get_name()])
char_data.append(char.get_probability())
char_data.append(char.get_code())
all_data.append(char_data)
print(tabulate(all_data, headers=["CARACTER", "UNICODE", "OCURRENCIA", "PROBABILIDAD RELATIVA", "COMPRENSIÓN"]))
def shannon_fano(characters):
if len(characters) == 1:
return
arr1 = []
arr2 = []
start = 0
end = len(characters)-1
while(start<=end):
if sum_arr(arr1) <= sum_arr(arr2):
characters[start].append_code("0")
arr1.append(characters[start])
start += 1
else:
characters[end].append_code("1")
arr2.append(characters[end])
end -= 1
arr1 = sorted(arr1, key=lambda char : char.get_probability())
arr2 = sorted(arr2, key=lambda char : char.get_probability())
arr1.reverse()
arr2.reverse()
shannon_fano(arr1)
shannon_fano(arr2)
def main():
quijote = open("el_quijote_v2.txt", "r")
quijote_data = quijote.read()
book = Book(quijote.name, len(quijote_data))
quijote.close()
for char in quijote_data:
if char not in book.get_characters():
book.get_characters()[char] = 1
else:
book.get_characters()[char] += 1
book.char_relative_prob()
# print(book.get_characters_prob())
sorted_probs = book.sort_char_rel_prob()
# print("\n\nSorted probs dict:", sorted_probs)
print("\nBook file name: "+book.get_name())
print("\nBook total character count: {0}".format(book.get_char_count()))
print("\nEntropy (theoretical):", book.calc_entropy())
characters = []
for char, probability in sorted_probs.items():
character = Character(char, probability)
characters.append(character)
shannon_fano(characters)
print("\nEntropy (Shannon Fano):", shannon_fano_avg(characters))
print("\nTotal probability:", book.total_char_prob())
print("\nShannon Fano Results: \n")
print_characters(characters, book.get_characters())
# character1 = Character('A', 0.22)
# character2 = Character('B', 0.28)
# character3 = Character('C', 0.15)
# character4 = Character('D', 0.30)
# character5 = Character('E', 0.05)
# test = []
# test.append(character1)
# test.append(character2)
# test.append(character3)
# test.append(character4)
# test.append(character5)
# test = sorted(test, key=lambda char : char.get_probability())
# test.reverse()
# shannon_fano(test)
# print_characters(test)
if __name__ == '__main__':
main()