-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtest_urdu_characters.py
119 lines (95 loc) · 3.72 KB
/
test_urdu_characters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# coding: utf8
""" Test cases """
import csv
import re
import unicodedata
from urdu_characters import (URDU_ALL_CHARACTERS, URDU_ALPHABETS, URDU_DIGITS, URDU_PUNCTUATIONS, URDU_DIACRITICS,
URDU_ALL_CHARACTERS_UNICODE, URDU_EXTRA_CHARACTERS)
URDU_UNICODE_RANGE = re.compile("[^\u0600-\u06ff]+")
def test_urdu_alphabet():
""" Test urdu_alphabet"""
assert len(URDU_ALPHABETS) == 46
assert isinstance(URDU_ALPHABETS, frozenset)
for character in URDU_ALPHABETS:
assert character in URDU_ALL_CHARACTERS
assert len(character) == 1
assert len(URDU_UNICODE_RANGE.findall(character)) == 0
assert isinstance(character, str)
def test_urdu_digits():
""" Test """
assert len(URDU_DIGITS) == 10
assert isinstance(URDU_DIGITS, frozenset)
for character in URDU_DIGITS:
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS
assert len(URDU_UNICODE_RANGE.findall(character)) == 0
assert isinstance(character, str)
def test_urdu_punctuation():
""" Test """
assert len(URDU_PUNCTUATIONS) == 6
assert isinstance(URDU_PUNCTUATIONS, frozenset)
for character in URDU_PUNCTUATIONS:
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS
assert len(URDU_UNICODE_RANGE.findall(character)) == 0
assert isinstance(character, str)
def test_diacritics():
""" Test """
assert len(URDU_DIACRITICS) == 6
assert isinstance(URDU_DIACRITICS, frozenset)
for character in URDU_DIACRITICS:
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS
assert len(URDU_UNICODE_RANGE.findall(character)) == 0
assert isinstance(character, str)
def test_extras_characters():
""" Test """
assert len(URDU_EXTRA_CHARACTERS) == 22
assert isinstance(URDU_EXTRA_CHARACTERS, frozenset)
for character in URDU_EXTRA_CHARACTERS:
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS
assert len(URDU_UNICODE_RANGE.findall(character)) == 0
assert isinstance(character, str)
def test_unicode():
""" Test """
for character in URDU_ALL_CHARACTERS:
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS_UNICODE
assert isinstance(character, str)
for character, value in URDU_ALL_CHARACTERS_UNICODE.items():
assert len(character) == 1
assert character in URDU_ALL_CHARACTERS
assert value in URDU_ALL_CHARACTERS
tmp = set()
for character, value in URDU_ALL_CHARACTERS_UNICODE.items():
tmp.add(value)
for character in URDU_ALL_CHARACTERS:
assert len(character) == 1
assert character in tmp
def test_unicode_norm():
"""Test case"""
for character in URDU_ALL_CHARACTERS:
if character == "ئ":
continue
characters = unicodedata.normalize('NFKD', character)
for char in characters:
assert char in URDU_ALL_CHARACTERS, characters
def test_check_data():
"""Data Type Check of all the elements"""
assert isinstance(URDU_ALL_CHARACTERS, frozenset)
assert isinstance(URDU_ALL_CHARACTERS_UNICODE, dict)
assert len(URDU_ALL_CHARACTERS) == 90
assert len(URDU_ALL_CHARACTERS_UNICODE) == len(URDU_ALL_CHARACTERS)
def test_urdu_arabic_unicode_comparison():
"""Check CSV file"""
with open('img/Urdu_Arabic_Unicode_comparison.csv', encoding="utf8") as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count in (0, 1):
line_count += 1
continue
char = row[4]
assert char in URDU_ALPHABETS
line_count += 1