-
Notifications
You must be signed in to change notification settings - Fork 20
/
test_hashes.py
65 lines (56 loc) · 2.46 KB
/
test_hashes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
def assert_is_sorted(lst, key=lambda x: x):
for i, el in enumerate(lst[1:]):
assert key(el) >= key(lst[i]) # i is the index of the previous element
def test_hashes():
from binascii import crc32
with open("ParamLabels.csv") as f:
csv = [line.rstrip('\n').split(',', 1) for line in f.readlines() if not line.isspace()]
assert_is_sorted(csv, key=lambda i: i[1])
alreadyFoundHashes = set()
for line in csv:
hashString = line[0]
hash = int(hashString, 16)
assert len(hashString) >= 12
# Only the lower 32 bits are the hash (blame arthur), ensure the crc is legit
# length - uppermost 8 bits
# crc32 - lowermost 32 bits
assert len(line[1].encode('utf-8')) == (hash >> 32)
assert crc32(line[1].encode('utf-8')) == (hash & 0xFFFFFFFF)
assert not hash in alreadyFoundHashes
alreadyFoundHashes.add(hash)
def main():
from binascii import crc32
errorPrint = False
with open("ParamLabels.csv", mode="r", encoding="utf-8") as f:
csv = [line.rstrip('\n').split(',', 1) for line in f.readlines() if not line.isspace()]
try:
assert_is_sorted(csv, key=lambda i: i[1])
except AssertionError:
print("ParamLabels.csv is not sorted, run remove_duplicates.py to fix")
errorPrint = True
alreadyFoundHashes = set()
for i, line in enumerate(csv):
hashString = line[0]
hash = int(hashString, 16)
hashUtf8 = line[1].encode('utf-8')
if not len(hashString) >= 12:
print(f"'{hashString}', line {i+1} is not properly padded to 12 chars.")
errorPrint = True
if not len(hashUtf8) == (hash >> 32):
print(f"'{hashString}', line {i+1} specified string length mismatch.")
errorPrint = True
# Only the lower 32 bits are the hash (blame arthur), ensure the crc is legit
if not crc32(hashUtf8) == (hash & 0xFFFFFFFF):
print(f"'{hashString}', line {i+1} crc32 mismatch.")
errorPrint = True
if hash in alreadyFoundHashes:
print(f"'{hashString}', line {i+1} hash duplicate.")
errorPrint = True
alreadyFoundHashes.add(hash)
if errorPrint:
input("Press Enter to exit...")
# Note: Intended use is with pytest, this is
# merely for printing out incorrect hashes
if __name__ == '__main__':
main()