-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteps.py
50 lines (37 loc) · 1.54 KB
/
steps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import unicodedata
def atayal_to_pseudo_ipa(text):
'''
Generate pseudo IPA from Atayal text.
Ref: https://en.wikipedia.org/wiki/Phonetic_symbols_in_Unicode
'''
preIPA = ''
for i in text :
if i == "'" :
preIPA += unicodedata.normalize('NFKC', '\u0294') #'ʔ'
elif i == 'b' :
preIPA += unicodedata.normalize('NFKC', '\u03B2') #'β'
elif i == 'c' :
preIPA += unicodedata.normalize('NFKC', '\u02A6') #'ʦ'
elif i == 'g':
preIPA += unicodedata.normalize('NFKC', '\u0263') #'ɣ'
elif i == 'y' :
preIPA += unicodedata.normalize('NFKC', 'j')
else : preIPA += i
needToConvert_ng = 'n' + unicodedata.normalize('NFKC', '\u0263') #'ŋ'
for i in range(len(preIPA)) :
preIPA = preIPA.replace(needToConvert_ng, unicodedata.normalize('NFKC', '\u014B'))
con_phm = []
for j in range(len(preIPA)) :
if preIPA[j] not in 'aeiouj' and preIPA[j] != ' ':
# Consonants
if lastChar == 'C':
con_phm.append(unicodedata.normalize('NFKC', '\u0259')) # push mid vowel
con_phm.append(preIPA[j]) # push current consonant
else :
con_phm.append(preIPA[j])
lastChar = 'C'
else :
# Vowels
con_phm.append(preIPA[j])
lastChar = 'V'
return ''.join(con_phm)