Skip to content

Commit

Permalink
Corrected ordering of rules
Browse files Browse the repository at this point in the history
  • Loading branch information
dmort27 committed Jul 11, 2019
1 parent f3c7ce3 commit 2aa4353
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
20 changes: 20 additions & 0 deletions epitran/data/map/find_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python3

import csv
import sys
from collections import defaultdict

def main(fn):
mappings = defaultdict(list)
with open(fn, encoding='utf-8') as f:
reader = csv.reader(f)
next(reader)
for orth, phon in reader:
mappings[orth].append(phon)
print(mappings)
for orth, phons in mappings.items():
if len(phons) > 1:
print(orth)

if __name__ == '__main__':
main(sys.argv[1])
12 changes: 6 additions & 6 deletions epitran/data/rules/pinyin-to-ipa.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
ng -> ŋ / _
b -> p / _
d -> t / _
g -> k / _
p -> pʰ / _
t -> tʰ / _
k -> kʰ / _
zh -> t͡ʂ / _
b -> p / _
d -> t / _
g -> k / _
zh -> ʈ͡ʂ / _
z -> t͡s / _
j -> t͡ɕ / _
ch -> t͡ʂʰ / _
ch -> ʈ͡ʂʰ / _
c -> t͡sʰ / _
q -> t͡ɕʰ / _
m -> m / _
Expand All @@ -20,6 +20,7 @@ x -> ɕ / _
h -> x / _
l -> l / _
r -> ɻ / _
eɻ -> ɻ̩ / _
w -> 0 / _ u
w -> w / _
y -> 0 / _ i
Expand All @@ -30,7 +31,6 @@ i -> ɻ̩ / (t͡ʂ|t͡ʂʰ|ʂ) _
a -> a / _
o -> uo / (p|pʰ|f|m) _
o -> o / _
eɻ -> ɻ̩ / _
e -> e / _
i -> i / _
u[:] -> y / _
Expand Down
2 changes: 2 additions & 0 deletions epitran/xsampa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
unicode_literals)

import os.path
import unicodedata

import pkg_resources

Expand Down Expand Up @@ -54,6 +55,7 @@ def ipa2xs(self, ipa):
Non-IPA segments are skipped.
"""
xsampa = []
ipa = unicodedata.normalize('NFD', ipa)
while ipa:
token = self.longest_prefix(ipa)
if token:
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from setuptools import setup

setup(name='epitran',
version='0.60',
version='0.61',
description='Tools for transcribing languages into IPA.',
url='http://github.com/dmort27/epitran',
download_url='https://github.com/dmort27/epitran/archive/0.60.tar.gz',
download_url='https://github.com/dmort27/epitran/archive/0.61.tar.gz',
author='David R. Mortensen',
author_email='[email protected]',
license='MIT',
install_requires=['setuptools',
'unicodecsv',
'regex',
'panphon>=0.12',
'panphon>=0.15',
'marisa_trie'],
extras_require={':python_version<"3.0"': ['subprocess32']},
scripts=['epitran/bin/epitranscribe.py',
Expand Down

0 comments on commit 2aa4353

Please sign in to comment.