-
Notifications
You must be signed in to change notification settings - Fork 8
/
word_error_rate.py
76 lines (62 loc) · 2.16 KB
/
word_error_rate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
@author Kiettiphong Manovisut
References:
https://en.wikipedia.org/wiki/Word_error_rate
https://www.github.com/mission-peace/interview/wiki
"""
import numpy
def get_word_error_rate(r, h):
"""
Given two list of strings how many word error rate(insert, delete or substitution).
"""
d = numpy.zeros((len(r) + 1) * (len(h) + 1), dtype=numpy.uint16)
d = d.reshape((len(r) + 1, len(h) + 1))
for i in range(len(r) + 1):
for j in range(len(h) + 1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
for i in range(1, len(r) + 1):
for j in range(1, len(h) + 1):
if r[i - 1] == h[j - 1]:
d[i][j] = d[i - 1][j - 1]
else:
substitution = d[i - 1][j - 1] + 1
insertion = d[i][j - 1] + 1
deletion = d[i - 1][j] + 1
d[i][j] = min(substitution, insertion, deletion)
result = float(d[len(r)][len(h)]) / len(r) * 100
print_to_html(r, h, d)
return result
def print_to_html(r, h, d):
filename = "diff.html"
x = len(r)
y = len(h)
html = '<html><body><head><meta charset="utf-8"></head>' \
'<style>.g{background-color:#0080004d}.r{background-color:#ff00004d}.y{background-color:#ffa50099}</style>'
while True:
if x == 0 or y == 0:
break
if r[x - 1] == h[y - 1]:
x = x - 1
y = y - 1
html = '%s ' % h[y] + html
elif d[x][y] == d[x - 1][y - 1] + 1: # substitution
x = x - 1
y = y - 1
html = '<span class="y">%s(%s)</span> ' % (h[y], r[x]) + html
elif d[x][y] == d[x - 1][y] + 1: # deletion
x = x - 1
html = '<span class="r">%s</span> ' % r[x] + html
elif d[x][y] == d[x][y - 1] + 1: # insertion
y = y - 1
html = '<span class="g">%s</span> ' % h[y] + html
else:
print('\nWe got an error.')
break
html += '</body></html>'
with open('diff.html', 'w') as f:
f.write(html)
f.close()
print("Printed comparison to: {0}".format(filename))