From e987d33ec403d5e242ffceaf28410fe0cab9c18c Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Fri, 9 Nov 2018 01:31:50 +1100 Subject: [PATCH 1/7] Add WuyixingSyllable --- Makefile | 2 + pyhokchew/models/concurrent/matsu.py | 107 +++++++++++++++++++++++++ pyhokchew/models/yngping/YngPingTwo.py | 2 + pyhokchew/tests/models/WuyixingTest.py | 42 ++++++++++ pyhokchew/tests/models/__init__.py | 0 5 files changed, 153 insertions(+) create mode 100644 Makefile create mode 100644 pyhokchew/models/concurrent/matsu.py create mode 100644 pyhokchew/tests/models/WuyixingTest.py create mode 100644 pyhokchew/tests/models/__init__.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4f38365 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +test: + python3 -m unittest discover -v . "*Test.py" \ No newline at end of file diff --git a/pyhokchew/models/concurrent/matsu.py b/pyhokchew/models/concurrent/matsu.py new file mode 100644 index 0000000..ca3bb65 --- /dev/null +++ b/pyhokchew/models/concurrent/matsu.py @@ -0,0 +1,107 @@ + +from ...utils import normalise + +WYX_CONSONANTS = ['p', 'ph', 'm', 't', 'th', 'n', 'l', 'ts', 'tsh', 's', 'k', 'kh', 'ng', 'h'] + +WYX_VOWELS = ['a', 'o', 'e', 'oe', 'i', 'u', 'y'] + +WYX_CODAS = ['ng', 'h', 'k', ''] + +WYX_INITIALS = WYX_CONSONANTS + [''] + + +# Tones +# 55 +# 53 s +# 33 f +# 212 v +# 242 x +# 23 z +# 5 + +class WuyixingSyllable: + """烏衣行馬祖話輸入法的拼音方案 + """ + + def __init__(self, initial, final, tone): + """ + """ + if initial not in set(WYX_INITIALS): + raise Exception('Unknown initial: %s' % initial) + self.initial = initial + self.final = final + self.tone = tone + + def __str__(self): + return 'WuyixingSyllable Initial=%s Final=%s Tone=%s' % (self.initial, self.final, self.tone) + + def __repr__(self): + return self.__str__() + + @classmethod + def from_string(cls, s): + s = normalise(s).strip().lower() + + # 需要先 match 較長的 + sorted_initials = sorted(WYX_INITIALS, reverse=True, key=lambda x: len(x)) + initial = None + for i in sorted_initials: + if s.startswith(i): + initial = i + s = s[len(i):] + break + + if initial is None: + raise Exception('No matching initial') + + rime = None + sorted_vowels = sorted(WYX_VOWELS, reverse=True, key=lambda x: len(x)) + + while True: + found = False + for v in sorted_vowels: + if s.startswith(v): + rime = v if rime is None else rime + v + s = s[len(v):] + found = True + break + if not found: + break + + if rime is None: + raise Exception('No matching final') + + coda = None + + for c in WYX_CODAS: + if s.startswith(c): + coda = c + s = s[len(c):] + break + + if coda is None: + raise Exception('No matching final') + + # the rest is the tone + tone = None + if s == "": + if coda in ['h', 'k']: + tone = "5" + else: + tone = "55" + elif s == "s": + tone = "53" + elif s == "f": + tone = "33" + elif s == "v": + tone = "212" + elif s == "x": + tone = "242" + elif s == "z": + if coda in ['h', 'k']: + tone = "23" + + if tone is None: + raise Exception('No matching tone.') + + return WuyixingSyllable(initial, rime+coda, tone) diff --git a/pyhokchew/models/yngping/YngPingTwo.py b/pyhokchew/models/yngping/YngPingTwo.py index c9ecd44..dca131f 100644 --- a/pyhokchew/models/yngping/YngPingTwo.py +++ b/pyhokchew/models/yngping/YngPingTwo.py @@ -125,6 +125,8 @@ def to_handwritten(self): return normalise(self.initial + rime + self.coda) def to_typing(self): + """轉爲鍵入方案 + """ return self.initial + self.rime + self.coda + self.tone def __str__(self): diff --git a/pyhokchew/tests/models/WuyixingTest.py b/pyhokchew/tests/models/WuyixingTest.py new file mode 100644 index 0000000..27297f2 --- /dev/null +++ b/pyhokchew/tests/models/WuyixingTest.py @@ -0,0 +1,42 @@ +import unittest +from ...models.concurrent.matsu import WuyixingSyllable +from ...utils import normalise + +class WuyixingParseTestCase(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_parsing(self): + """解析烏衣行音節 + """ + for s, expected in self.PARSE_TEST_CASES: + with self.subTest(msg="解析烏衣行音節 s" , s=s): + syllable = WuyixingSyllable.from_string(s) + expectedInitial, expectedFinal, expectedTone = expected + self.assertEqual(syllable.initial, expectedInitial) + self.assertEqual(syllable.final, expectedFinal) + self.assertEqual(syllable.tone, expectedTone) + + PARSE_TEST_CASES = [ + ('houv',('h','ou','212')), + ('houx',('h','ou','242')), + ('huangs',('h','uang','53')), + ('huong',('h','uong','55')), + ('hus',('h','u','53')), + ('paukz',('p','auk','23')), + ('phiakz',('ph','iak','23')), + ('phouh',('ph','ouh','5')), + ('pok',('p','ok','5')), + ('pongf',('p','ong','33')), + ('poungv',('p','oung','212')), + ('poungx',('p','oung','242')), + ('thiekz',('th','iek','23')), + ('tsaikz',('ts','aik','23')), + ('tshaikz',('tsh','aik','23')), + ('oe',('','oe','55')), + ] + + diff --git a/pyhokchew/tests/models/__init__.py b/pyhokchew/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 From 01c36714b1d5a035fec807b3d6fb7a0b1c9ca4cd Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Fri, 9 Nov 2018 05:18:09 +1100 Subject: [PATCH 2/7] add MinjiangSyllable --- pyhokchew/models/concurrent/minjiang.py | 109 ++++++++++++++++++++++++ pyhokchew/tests/models/MinjiangTest.py | 32 +++++++ 2 files changed, 141 insertions(+) create mode 100644 pyhokchew/models/concurrent/minjiang.py create mode 100644 pyhokchew/tests/models/MinjiangTest.py diff --git a/pyhokchew/models/concurrent/minjiang.py b/pyhokchew/models/concurrent/minjiang.py new file mode 100644 index 0000000..5cb7584 --- /dev/null +++ b/pyhokchew/models/concurrent/minjiang.py @@ -0,0 +1,109 @@ +from ...utils import normalise + +MJ_CONSONANTS = ['b', 'p', 'm', 'd', 't', 'n', 'l', 'z', 'c', 's', 'g', 'k', 'ng', 'h'] + +MJ_VOWELS = ['a', 'o', 'e', 'ë', 'i', 'u', 'ü'] + +MJ_CODAS = ['ng', 'h', 'k', ''] + +MJ_INITIALS = MJ_CONSONANTS + [''] + +TONE_MAPPINGS = { + "1": "55", + "2": "53", + "3": "33", + "4": "212", + "5": "242", + "6": "23", + "7": "5" +} + +MJ_TONES = list(TONE_MAPPINGS.keys()) + +def normalise_mj(s): + # 正規化閩江學院的奇怪字符 + s = normalise(s).strip().lower() + s = s.replace('ɑ','a').replace('ê','e').replace('ɡ','g') + return s + +# Tones +# 55 1 +# 53 2 +# 33 3 +# 212 4 +# 242 5 +# 23 6 +# 5 7 + +class MinjiangSyllable: + """閩江學院的拼音方案 + """ + + def __init__(self, initial, final, tone): + """ + """ + if initial not in set(MJ_INITIALS): + raise Exception('Unknown initial: %s' % initial) + self.initial = initial + self.final = final + self.tone = tone + + def __str__(self): + return 'MinjiangSyllable Initial=%s Final=%s Tone=%s' % (self.initial, self.final, self.tone) + + def __repr__(self): + return self.__str__() + + @classmethod + def from_string(cls, s): + s = normalise_mj(s) + + # 需要先 match 較長的 + sorted_initials = sorted(MJ_INITIALS, reverse=True, key=lambda x: len(x)) + initial = None + for i in sorted_initials: + if s.startswith(i): + initial = i + s = s[len(i):] + break + + if initial is None: + raise Exception('No matching initial') + + rime = None + sorted_vowels = sorted(MJ_VOWELS, reverse=True, key=lambda x: len(x)) + + while True: + found = False + for v in sorted_vowels: + if s.startswith(v): + rime = v if rime is None else rime + v + s = s[len(v):] + found = True + break + if not found: + break + + if rime is None: + raise Exception('No matching final') + + coda = None + + for c in MJ_CODAS: + if s.startswith(c): + coda = c + s = s[len(c):] + break + + if coda is None: + raise Exception('No matching final') + + # the rest is the tone + tone = None + if s in MJ_TONES: + tone = TONE_MAPPINGS[s] + + if tone is None: + raise Exception('No matching tone.') + + return MinjiangSyllable(initial, rime+coda, tone) diff --git a/pyhokchew/tests/models/MinjiangTest.py b/pyhokchew/tests/models/MinjiangTest.py new file mode 100644 index 0000000..99b61d6 --- /dev/null +++ b/pyhokchew/tests/models/MinjiangTest.py @@ -0,0 +1,32 @@ +import unittest +from ...models.concurrent.minjiang import MinjiangSyllable +from ...utils import normalise + +class MinjiangParseTestCase(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_parsing(self): + """解析閩江學院的音節 + """ + for s, expected in self.PARSE_TEST_CASES: + with self.subTest(msg="解析烏閩江學院的音節 %s" % s): + syllable = MinjiangSyllable.from_string(s) + expectedInitial, expectedFinal, expectedTone = expected + self.assertEqual(syllable.initial, expectedInitial) + self.assertEqual(syllable.final, expectedFinal) + self.assertEqual(syllable.tone, expectedTone) + + PARSE_TEST_CASES = [ + ('huɑnɡ1',('h','uang','55')), + ('goüng5',('g','oüng','242')), + ('gëü5',('g','ëü','242')), + ('huak7',('h','uak','5')), + ('kê1',('k','e','55')), + ('ieng4',('','ieng','212')) + ] + + From c2bed2b2bcf068707621e230ee4def0440f8ea48 Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Fri, 9 Nov 2018 06:51:48 +1100 Subject: [PATCH 3/7] =?UTF-8?q?=E9=96=A9=E6=B1=9F+=E7=83=8F=E8=A1=A3?= =?UTF-8?q?=E8=A1=8C=E8=BD=89=E6=A6=95=E6=8B=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyhokchew/convert.py | 57 +++++++++++++++++++++++++- pyhokchew/tests/ConversionTest.py | 41 ++++++++++++++++++ pyhokchew/tests/models/MinjiangTest.py | 2 +- 3 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 pyhokchew/tests/ConversionTest.py diff --git a/pyhokchew/convert.py b/pyhokchew/convert.py index 0f31ad4..98b8114 100644 --- a/pyhokchew/convert.py +++ b/pyhokchew/convert.py @@ -1,6 +1,8 @@ from .models.CikLinSyllable import CikLinSyllable from .models.FoochowRomanized import FoochowRomanizedSyllable - +from .models.concurrent.matsu import WuyixingSyllable +from .models.concurrent.minjiang import MinjiangSyllable +from .models.yngping.YngPingTwo import YngPingSyllable def foochow_romanized_to_ciklin(f): ciklin_syllable = CikLinSyllable(f.initial,f.final,f.tone) @@ -18,3 +20,56 @@ def ciklin_to_foochow_romanized_string(cInitial, cFinal, cTone): :return: The converted Foochow Romanized string. """ return ciklin_to_foochow_romanized(CikLinSyllable.from_ciklin_string(cInitial+cFinal,cTone)).get_string() + + +def wuyixing_to_yngping(s): + """從烏衣行轉換爲榕拼方案 + """ + wyx = WuyixingSyllable.from_string(s) + INITIAL_MAPPING = { + 'p': 'b', + 'ph':'p', + 't' :'d', + 'th':'t', + 'ts': 'z', + 'tsh': 'c', + 'k':'g', + 'kh':'k', + } + + FINAL_MAPPING = { + # TODO: 榕拼無? + 'ieu': 'iu' + } + + return YngPingSyllable( + INITIAL_MAPPING[wyx.initial] if wyx.initial in INITIAL_MAPPING.keys() else wyx.initial, + FINAL_MAPPING[wyx.final] if wyx.final in FINAL_MAPPING.keys() else wyx.final, + wyx.tone).to_typing() + + +def minjiang_to_yngping(s): + """從閩江學院轉換爲榕拼方案 + """ + mj = MinjiangSyllable.from_string(s) + + + FINAL_MAPPING = { + # TODO: 榕拼無? + 'ieu': 'iu' + } + + VOWEL_MAPPING = { + 'ë': 'oe', + 'ü': 'y' + } + + final = mj.final + for v in VOWEL_MAPPING.keys(): + nv = VOWEL_MAPPING[v] + final = final.replace(v,nv) + + return YngPingSyllable( + mj.initial, + final, + mj.tone).to_typing() \ No newline at end of file diff --git a/pyhokchew/tests/ConversionTest.py b/pyhokchew/tests/ConversionTest.py new file mode 100644 index 0000000..1df3ef3 --- /dev/null +++ b/pyhokchew/tests/ConversionTest.py @@ -0,0 +1,41 @@ +import unittest +from ..utils import normalise, denormalise +from ..convert import wuyixing_to_yngping, minjiang_to_yngping + +class ConversionTestCase(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_wuyixing_to_yngping(self): + """測試烏衣行轉榕拼 + """ + + TESTS = [ + ("kieuf", "giu33"), + ("khov", "ko212"), + ("eingx", "eing242"), + ('khoeyng','koeyng55'), + ('ngiuf','ngiu33') + ] + + for wyx, yngping in TESTS: + with self.subTest(msg="測試烏衣行轉榕拼 %s => %s" % (wyx, yngping)): + self.assertEqual(yngping, wuyixing_to_yngping(wyx)) + + def test_minjiang_to_yngping(self): + """測試閩江學院轉榕拼 + """ + + TESTS = [ + ('lëünɡ2','loeyng53'), + ('ɑnɡ4','ang212'), + ('hünɡ1','hyng55'), + ('ɑi1', 'ai55') + ] + + for mj, yngping in TESTS: + with self.subTest(msg="測試閩江學院轉榕拼 %s => %s" % (mj, yngping)): + self.assertEqual(yngping, minjiang_to_yngping(mj)) \ No newline at end of file diff --git a/pyhokchew/tests/models/MinjiangTest.py b/pyhokchew/tests/models/MinjiangTest.py index 99b61d6..4fc62a4 100644 --- a/pyhokchew/tests/models/MinjiangTest.py +++ b/pyhokchew/tests/models/MinjiangTest.py @@ -13,7 +13,7 @@ def test_parsing(self): """解析閩江學院的音節 """ for s, expected in self.PARSE_TEST_CASES: - with self.subTest(msg="解析烏閩江學院的音節 %s" % s): + with self.subTest(msg="解析閩江學院的音節 %s" % s): syllable = MinjiangSyllable.from_string(s) expectedInitial, expectedFinal, expectedTone = expected self.assertEqual(syllable.initial, expectedInitial) From 8388aff66583a48c633892d724cd9a0ac0fdf712 Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Fri, 9 Nov 2018 06:58:19 +1100 Subject: [PATCH 4/7] moving files around --- pyhokchew/convert.py | 4 ++-- pyhokchew/models/{ => historical}/CikLinSyllable.py | 0 pyhokchew/models/{ => historical}/FoochowRomanized.py | 2 +- pyhokchew/models/historical/__init__.py | 0 pyhokchew/parser.py | 4 ++-- pyhokchew/tests/ParserTest.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) rename pyhokchew/models/{ => historical}/CikLinSyllable.py (100%) rename pyhokchew/models/{ => historical}/FoochowRomanized.py (99%) create mode 100644 pyhokchew/models/historical/__init__.py diff --git a/pyhokchew/convert.py b/pyhokchew/convert.py index 98b8114..5c893f6 100644 --- a/pyhokchew/convert.py +++ b/pyhokchew/convert.py @@ -1,5 +1,5 @@ -from .models.CikLinSyllable import CikLinSyllable -from .models.FoochowRomanized import FoochowRomanizedSyllable +from .models.historical.CikLinSyllable import CikLinSyllable +from .models.historical.FoochowRomanized import FoochowRomanizedSyllable from .models.concurrent.matsu import WuyixingSyllable from .models.concurrent.minjiang import MinjiangSyllable from .models.yngping.YngPingTwo import YngPingSyllable diff --git a/pyhokchew/models/CikLinSyllable.py b/pyhokchew/models/historical/CikLinSyllable.py similarity index 100% rename from pyhokchew/models/CikLinSyllable.py rename to pyhokchew/models/historical/CikLinSyllable.py diff --git a/pyhokchew/models/FoochowRomanized.py b/pyhokchew/models/historical/FoochowRomanized.py similarity index 99% rename from pyhokchew/models/FoochowRomanized.py rename to pyhokchew/models/historical/FoochowRomanized.py index 364f474..624ceb5 100644 --- a/pyhokchew/models/FoochowRomanized.py +++ b/pyhokchew/models/historical/FoochowRomanized.py @@ -1,4 +1,4 @@ -from ..utils import normalise +from ...utils import normalise # All possible initials of Foochow Romanized # 所有可能的福州話羅馬字聲母 diff --git a/pyhokchew/models/historical/__init__.py b/pyhokchew/models/historical/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyhokchew/parser.py b/pyhokchew/parser.py index 49ddfa9..da24693 100644 --- a/pyhokchew/parser.py +++ b/pyhokchew/parser.py @@ -1,5 +1,5 @@ -from .models.CikLinSyllable import CikLinSyllable -from .models.FoochowRomanized import FoochowRomanizedSyllable +from .models.historical.CikLinSyllable import CikLinSyllable +from .models.historical.FoochowRomanized import FoochowRomanizedSyllable def parse_ciklin(fanqie, tone): return CikLinSyllable.from_ciklin_string(fanqie, tone) diff --git a/pyhokchew/tests/ParserTest.py b/pyhokchew/tests/ParserTest.py index 36d4784..b5ced1b 100644 --- a/pyhokchew/tests/ParserTest.py +++ b/pyhokchew/tests/ParserTest.py @@ -1,5 +1,5 @@ import unittest -from ..models.FoochowRomanized import FoochowRomanizedSyllable +from ..models.historical.FoochowRomanized import FoochowRomanizedSyllable from ..parser import parse_ciklin from ..utils import normalise, denormalise from ..convert import foochow_romanized_to_ciklin, ciklin_to_foochow_romanized From 4b025239e2c1ea4957ff15ee754422c89d881e99 Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Fri, 9 Nov 2018 08:15:08 +1100 Subject: [PATCH 5/7] hector --- .gitignore | 4 +- pyhokchew/convert.py | 13 +- pyhokchew/models/historical/HectorScheme.py | 129 ++++++++++++++++++++ pyhokchew/tests/ConversionTest.py | 16 ++- 4 files changed, 152 insertions(+), 10 deletions(-) create mode 100644 pyhokchew/models/historical/HectorScheme.py diff --git a/.gitignore b/.gitignore index ec9cdb9..7dfecc4 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,6 @@ ENV/ .idea/ -.vscode/ \ No newline at end of file +.vscode/ + +.DS_Store \ No newline at end of file diff --git a/pyhokchew/convert.py b/pyhokchew/convert.py index 5c893f6..2585624 100644 --- a/pyhokchew/convert.py +++ b/pyhokchew/convert.py @@ -1,5 +1,6 @@ from .models.historical.CikLinSyllable import CikLinSyllable from .models.historical.FoochowRomanized import FoochowRomanizedSyllable +from .models.historical.HectorScheme import HectorSyllable from .models.concurrent.matsu import WuyixingSyllable from .models.concurrent.minjiang import MinjiangSyllable from .models.yngping.YngPingTwo import YngPingSyllable @@ -21,6 +22,9 @@ def ciklin_to_foochow_romanized_string(cInitial, cFinal, cTone): """ return ciklin_to_foochow_romanized(CikLinSyllable.from_ciklin_string(cInitial+cFinal,cTone)).get_string() +def hector_to_foochow_romanized(s): + h = HectorSyllable.from_string(s) + return FoochowRomanizedSyllable(h.initial,h.final,h.tone).get_string() def wuyixing_to_yngping(s): """從烏衣行轉換爲榕拼方案 @@ -53,12 +57,6 @@ def minjiang_to_yngping(s): """ mj = MinjiangSyllable.from_string(s) - - FINAL_MAPPING = { - # TODO: 榕拼無? - 'ieu': 'iu' - } - VOWEL_MAPPING = { 'ë': 'oe', 'ü': 'y' @@ -72,4 +70,5 @@ def minjiang_to_yngping(s): return YngPingSyllable( mj.initial, final, - mj.tone).to_typing() \ No newline at end of file + mj.tone).to_typing() + diff --git a/pyhokchew/models/historical/HectorScheme.py b/pyhokchew/models/historical/HectorScheme.py new file mode 100644 index 0000000..8c234ac --- /dev/null +++ b/pyhokchew/models/historical/HectorScheme.py @@ -0,0 +1,129 @@ +from ...utils import normalise + +HT_INITIALS = ['l', 'b', 'g', 'k', 'd', + 'p', 't', 'z', 'n', 's', + '' , 'm','ng', 'c', 'h'] + +HT_FINALS = [ + # 春 花 香 秋 山 + 'ung', 'ua','iong', 'iu', 'ang', + # 開 嘉 賓 歡 歌 + 'ai' , 'a', 'ing', 'uang', 'o', + # 須 杯 孤 燈 光 + 'y' , 'uoi', 'u', 'eng', 'uong', + # 輝 燒 銀 缸 之 + 'ui' , 'ieu', 'yng', 'ong', 'i', + # 東 郊 過 西 橋 + 'oeng', 'au', 'uo', 'e', 'io', + # 鷄 聲 催 初 天 + 'ie', 'iang', 'oi', 'oe', 'ieng', + # 奇 歪 溝 + 'ia', 'uai', 'eu' + # TODO 桸𤬣 ya +] + +# 入聲 +HT_FINALS_RU = [ + # 春 花 香 秋 山 + 'uk' , 'uah', 'iok', 'iuh', 'ak', + # 開 嘉 賓 歡 歌 + 'aih', 'ah', 'ik', 'uak', 'oh', + # 須 杯 孤 燈 光 + 'yh' , 'uoih', 'uh', 'ek', 'uok', + # 輝 燒 銀 缸 之 + 'uih', 'ieuh', 'yk', 'ok', 'ih', + # 東 郊 過 西 橋 + 'oek', 'auh', 'uoh', 'eh', 'ioh', + # 鷄 聲 催 初 天 + 'ieh', 'iak', 'oih', 'oeh', 'iek', + # 奇 歪 溝 + 'iah', 'uaih', 'euh' +] + +class HectorSyllable: + """ + only3km 的擬音code. + https://github.com/only3km/ciklinbekin/blob/gh-pages/convert.tsv + """ + TONE_MAPPING_REVERSE = [1,2,3,4,5,7,8] + + def __init__(self, initial, final, tone): + if not (initial in range(len(HT_INITIALS)) and final in range(len(HT_FINALS)) \ + and tone in self.TONE_MAPPING_REVERSE): + raise ValueError("Invalid syllable arguments.") + + self.initial = initial + self.final = final + self.tone = tone + pass + + def __str__(self): + return "HectorSyllable " + self.get_string() + \ + " [Initial=%d Final=%d Tone=%d]" % (self.initial, self.final, self.tone) + + def __repr__(self): + return self.__str__() + + def get_initial(self): + """ + Gets the initial of the syllable. + 獲取該音節的聲母. + :return: + """ + return HT_INITIALS[self.initial] + + def get_final(self): + """ + Gets the final of the syllable without tonal marks. + 獲取該音節的韻母. + :return: + """ + return HT_FINALS_RU[self.final] if self.tone in [4,8] else HT_FINALS[self.final] + + def get_tone(self): + """ + Gets the tone number. + 獲取該音節的聲調. + :return: + """ + return self.tone + + def get_string(self): + return self.get_initial() + \ + self.get_final() + \ + str(self.tone) + + @classmethod + def from_string(cls, s): + """ + """ + s = normalise(s).strip().lower() + + # Try parse initial + initials_list = sorted(HT_INITIALS, key = lambda x: len(x), reverse= True) # Longer matches first + + for i in initials_list: + if s.startswith(i): + initial = HT_INITIALS.index(i) + remaining = s[len(i):] + break + + # Try tone + tone = None + toneStr = remaining[-1:] + remaining = remaining[:-1] + try: + tone = int(toneStr) + except: + raise ValueError("%s is not a valid Hector syllable: unknown tone") + + # Try parse final + if remaining in HT_FINALS: + final = HT_FINALS.index(remaining) + elif remaining in HT_FINALS_RU and tone in [4,8]: + final = HT_FINALS_RU.index(remaining) + else: + raise ValueError("%s is not a valid Hector syllable: %s not found in finals. " \ + % (s, remaining)) + + return cls(initial, final, tone) diff --git a/pyhokchew/tests/ConversionTest.py b/pyhokchew/tests/ConversionTest.py index 1df3ef3..b0afb75 100644 --- a/pyhokchew/tests/ConversionTest.py +++ b/pyhokchew/tests/ConversionTest.py @@ -1,6 +1,6 @@ import unittest from ..utils import normalise, denormalise -from ..convert import wuyixing_to_yngping, minjiang_to_yngping +from ..convert import wuyixing_to_yngping, minjiang_to_yngping, hector_to_foochow_romanized class ConversionTestCase(unittest.TestCase): def setUp(self): @@ -38,4 +38,16 @@ def test_minjiang_to_yngping(self): for mj, yngping in TESTS: with self.subTest(msg="測試閩江學院轉榕拼 %s => %s" % (mj, yngping)): - self.assertEqual(yngping, minjiang_to_yngping(mj)) \ No newline at end of file + self.assertEqual(yngping, minjiang_to_yngping(mj)) + + def test_hector_to_foochow_romanized(self): + """測試 only3km 擬音 code 轉羅馬字 + """ + + TESTS = [ + ('iok4','iók'), + ] + + for hector, fr in TESTS: + with self.subTest(msg=" %s => %s" % (hector, fr)): + self.assertEqual(fr, hector_to_foochow_romanized(hector)) \ No newline at end of file From 49c7a6a6209e4d065d95bcf71528f7042cba4497 Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Sat, 10 Nov 2018 10:42:36 +1100 Subject: [PATCH 6/7] =?UTF-8?q?=E6=AD=B7=E5=8F=B2=E9=9F=B3=E7=B3=BB?= =?UTF-8?q?=E8=BD=89=E7=8F=BE=E4=BB=A3=E9=9F=B3=E7=B3=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyhokchew/convert.py | 77 ++++- .../models/historical/FoochowRomanized.py | 50 +++- pyhokchew/models/yngping/YngPingTwo.py | 6 +- pyhokchew/tests/ConversionTest.py | 263 +++++++++++++++++- pyhokchew/utils.py | 4 + 5 files changed, 391 insertions(+), 9 deletions(-) diff --git a/pyhokchew/convert.py b/pyhokchew/convert.py index 2585624..7e2ef30 100644 --- a/pyhokchew/convert.py +++ b/pyhokchew/convert.py @@ -22,9 +22,84 @@ def ciklin_to_foochow_romanized_string(cInitial, cFinal, cTone): """ return ciklin_to_foochow_romanized(CikLinSyllable.from_ciklin_string(cInitial+cFinal,cTone)).get_string() +def foochow_romanized_to_yngping(f: FoochowRomanizedSyllable): + INITIAL_MAPPING = ['l', 'b', 'g', 'k', 'd', + 'p', 't', 'z', 'n', 's', + '' , 'm','ng','c', 'h'] + FINAL_MAPPING = [ + # 1 2 3 4 5 7 8 FR + # 55 33 212 23 53 242 5 YP + ['ung', 'ung', 'oung', 'ouk', 'ung', 'oung', 'uk'], # 春 + ['ua', 'ua', 'ua', 'uah', 'ua', 'ua', 'uah'], # 花 + ['yong', 'yong','yong', 'yok', 'yong', 'yong', 'yok'], # 香 + ['iu', 'iu', 'iu', 'iuh', 'iu', 'iu', None], # 秋 + ['ang', 'ang', 'ang', 'ak', 'ang', 'ang', 'ak'], # 山 + ['ai', 'ai', 'ai', 'aih', 'ai', 'ai', 'aih'], # 開 + ['a', 'a', 'a', 'ah', 'a', 'a', 'ah'], # 嘉 + ['ing', 'ing', 'eing', 'eik', 'ing', 'eing', 'ik'], # 賓 + ['uang', 'uang','uang', 'uak', 'uang', 'uang', 'uak'], # 歡 + ['o', 'o', 'o', 'oh', 'o', 'o', 'oh'], # 歌 + ['y', 'y', 'oey', 'oeyk','y', 'oey', 'yk'], # 須 + ['ui', 'ui', 'ui', 'uih', 'ui', 'ui', None], # 杯 + ['u', 'u', 'ou', 'ouh', 'u', 'ou', 'uh'], # 孤 + ['eing', 'eing','aing', 'aik', 'eing', 'aing', 'eik'], # 燈 + ['uong', 'uong','uong', 'uok', 'uong', 'uong', 'uok'], # 光 + ['ui', 'ui', 'ui', 'uih', 'ui', 'ui', 'uih'], # 輝 + ['iu', 'iu', 'iu', 'iuh', 'iu', 'iu', None], # 燒 + ['yng', 'yng', 'oeyng','oeyk','yng', 'oeyng','yk'], # 銀 + ['oung', 'oung','aung', 'auk', 'oung', 'aung', 'ouk'], # 缸 + ['i', 'i', 'ei', 'eih', 'i', 'ei', 'ih'], # 之 + ['oeyng','oeyng','oyng','oyk', 'oeyng','oyng', 'oeyk'],# 東 + ['au', 'au', 'au', 'auh' ,'au', 'au', 'auh'], # 郊 + ['uo', 'uo', 'uo', 'uoh', 'uo', 'uo', 'uoh'], # 過 + ['e', 'e', 'a', 'ah', 'e', 'a', 'eh'], # 西 + ['yo', 'yo', 'yo', 'yoh', 'yo', 'yo', 'yoh'], # 橋 + ['ie', 'ie', 'ie', 'ieh', 'ie', 'ie', 'ieh'], # 鷄 + ['iang', 'iang','iang', 'iak', 'iang', 'iang', 'iak'], # 聲 + ['oey', 'oey', 'oy', 'oyh', 'oey', 'oy', 'oeyh'],# 催 + ['oe', 'oe', 'o', 'oeh', 'oe', 'o', 'oeh'], # 初 + ['ieng', 'ieng','ieng', 'iek', 'ieng', 'ieng', 'iek'], # 天 + ['ia', 'ia', 'ia', 'ieh', 'ia', 'ia', 'ieh'], # 奇 + ['uai', 'uai', 'uai', 'uaih','uai', 'uai', 'uaih'],# 歪 + ['eu', 'eu', 'au', 'auh', 'eu', 'au', 'euh'] # 溝 + ] + FINAL_MAPPING_IO_UO = { + 24: ['uo', 'uo', 'uo', 'uoh', 'uo', 'uo', 'uoh'], + 2: ['uong', 'uong','uong', 'uok', 'uong', 'uong', 'uok'] + } + TONE_MAPPING = { + 1: "55", + 2: "33", + 3: "212", + 4: "23", + 5: "53", + 6: "33", + 7: "242", + 8: "5" + } + ypInitial = INITIAL_MAPPING[f.initial] + toneIndex = FoochowRomanizedSyllable.TONE_MAPPING[f.tone] + if ypInitial not in ['','g','k','h','ng'] and f.final in [2, 24]: #香 / 橋 + # 漳泉亂 + ypFinal = FINAL_MAPPING_IO_UO[f.final][toneIndex] + else: + ypFinal = FINAL_MAPPING[f.final][toneIndex] + ypTone = TONE_MAPPING[f.tone] + return YngPingSyllable(ypInitial, ypFinal, ypTone) + def hector_to_foochow_romanized(s): h = HectorSyllable.from_string(s) - return FoochowRomanizedSyllable(h.initial,h.final,h.tone).get_string() + return FoochowRomanizedSyllable(h.initial,h.final,h.tone) + +def hector_to_foochow_romanized_string(s): + return hector_to_foochow_romanized(s).get_string() + +def hector_to_yngping(s): + f = hector_to_foochow_romanized(s) + return foochow_romanized_to_yngping(f).to_typing() + +def foochow_romanized_to_yngping_string(s, allow_omit_ingbing=False): + return foochow_romanized_to_yngping(FoochowRomanizedSyllable.from_string(s,allow_omit_ingbing)).to_typing() def wuyixing_to_yngping(s): """從烏衣行轉換爲榕拼方案 diff --git a/pyhokchew/models/historical/FoochowRomanized.py b/pyhokchew/models/historical/FoochowRomanized.py index 624ceb5..737ea0f 100644 --- a/pyhokchew/models/historical/FoochowRomanized.py +++ b/pyhokchew/models/historical/FoochowRomanized.py @@ -44,6 +44,44 @@ 'eu': ['ĕu', 'ēu', 'áiu', 'áiuh','èu', 'âiu', 'ĕuh'] # 溝 } +# 無調號下入 +FR_FINALS_RU_NOACCENT = [ + 'uk', # 春 + 'uah', # 花 + 'iok', # 香 + None, # 秋 + 'ak', # 山 + 'aih', # 開 + 'ah', # 嘉 + 'ik', # 賓 + 'uak', # 歡 + 'o̤h', # 歌 + 'ṳh', # 須 + None, # 杯 + 'uh', # 孤 + 'ek', # 燈 + 'uok', # 光 + 'uih', # 輝 + None, # 燒 + 'ṳk', # 銀 + 'ok', # 缸 + 'ih', # 之 + 'e̤k', # 東 + 'auh', # 郊 + 'uoh', # 過 + 'a̤h', # 西 + 'ioh', # 橋 + 'ieh', # 鷄 + 'iak', # 聲 + 'oih', # 催 + 'e̤h', # 初 + 'iek', # 天 + 'iah', # 奇 + 'uaih',# 歪 + 'euh' # 溝 +] + + # List of all possible finals in Foochow Romanized, without tonal marks. FR_FINALS_LIST = list(FR_FINALS.keys()) @@ -129,7 +167,7 @@ def from_string(cls, s, allow_omit_ingbing = False): """ Parse a Foochow Romanized syllable from a string. :param s: A single Foochow Romanized syllable string. - :param allow_omit_ingbing: 允許上平聲調號省略。 + :param allow_omit_ingbing: 允許上平、下入聲調號省略。 See: https://cdo.wikipedia.org/wiki/%E5%B9%AB%E5%8A%A9:Ci%C5%8Fng-i%C3%B4ng_t%C4%95%CC%A4k#%E5%B0%8D%E6 %95%99%E6%9C%83%E5%B9%B3%E8%A9%B1%E5%AD%97%E6%94%B9%E9%80%B2%E7%9A%84%E6%84%8F%E8%A6%8B """ @@ -151,13 +189,17 @@ def from_string(cls, s, allow_omit_ingbing = False): tone = FoochowRomanizedSyllable.TONE_MAPPING_REVERSE[mapping[1]] else: if (allow_omit_ingbing): - # 允許上平調省略 + # 允許上平及下入調省略 try: final = FR_FINALS_LIST.index(remaining) tone = 1 except ValueError: - raise ValueError("%s is not a valid Foochow Romanized syllable: %s not found in finals. " \ - % (s, remaining)) + try: + final = FR_FINALS_RU_NOACCENT.index(remaining) + tone = 8 + except ValueError: + raise ValueError("%s is not a valid Foochow Romanized syllable: %s not found in finals. " \ + % (s, remaining)) else: raise ValueError("%s is not a valid Foochow Romanized syllable: %s not found in finals. " \ % (s, remaining)) diff --git a/pyhokchew/models/yngping/YngPingTwo.py b/pyhokchew/models/yngping/YngPingTwo.py index dca131f..0a151ca 100644 --- a/pyhokchew/models/yngping/YngPingTwo.py +++ b/pyhokchew/models/yngping/YngPingTwo.py @@ -24,11 +24,13 @@ # 入聲韻 甲類 'ah', 'ak', 'eh', 'oh', 'oeh', 'iah', 'iak', 'ieh', 'iek', 'uah', 'uak', 'uoh', 'uok', 'yoh', 'yok', # 入聲韻 丙類 - 'ik', 'eik', 'ih', 'eih', 'aik', 'uk', 'ouk', 'auk', 'yk', 'oeyk', 'oyk', + 'ik', 'eik', 'ih', 'eih', 'aik', 'uk', 'ouk', 'auk', 'yk', 'oeyk', 'oyk', 'oeyh', 'oyh', # 陽聲韻 甲類 'ang', 'iang', 'ieng', 'uang', 'uong', 'yong', # 陽聲韻 丙類 - 'ing', 'eing', 'aing', 'ung', 'oung', 'aung', 'yng', 'oeyng', 'oyng' + 'ing', 'eing', 'aing', 'ung', 'oung', 'aung', 'yng', 'oeyng', 'oyng', + # TODO: INSPECT 兼容古音系 + 'ouh', 'uh', 'auh' ] YP_TONES = [ diff --git a/pyhokchew/tests/ConversionTest.py b/pyhokchew/tests/ConversionTest.py index b0afb75..93b0b0c 100644 --- a/pyhokchew/tests/ConversionTest.py +++ b/pyhokchew/tests/ConversionTest.py @@ -1,6 +1,7 @@ import unittest from ..utils import normalise, denormalise -from ..convert import wuyixing_to_yngping, minjiang_to_yngping, hector_to_foochow_romanized +from ..convert import wuyixing_to_yngping, minjiang_to_yngping, hector_to_foochow_romanized_string, \ + hector_to_yngping, foochow_romanized_to_yngping_string class ConversionTestCase(unittest.TestCase): def setUp(self): @@ -46,8 +47,266 @@ def test_hector_to_foochow_romanized(self): TESTS = [ ('iok4','iók'), + ("uh4", "óh") ] for hector, fr in TESTS: with self.subTest(msg=" %s => %s" % (hector, fr)): - self.assertEqual(fr, hector_to_foochow_romanized(hector)) \ No newline at end of file + self.assertEqual(fr, hector_to_foochow_romanized_string(hector)) + + def test_historical_to_yngping(self): + """測試歷史音系轉換 + """ + + TESTS = [ + ("ung55", "ung1", "ung"), + ("ung53", "ung5", "ùng"), + ("ung33", "ung2", "ūng"), + ("oung212", "ung3", "óng"), + ("oung242", "ung7", "ông"), + ("ouk23", "uk4", "ók"), + ("uk5", "uk8", "uk"), + ("ua55", "ua1", "ua"), + ("ua53", "ua5", "uà"), + ("ua33", "ua2", "uā"), + ("ua212", "ua3", "uá"), + ("ua242", "ua7", "uâ"), + ("uah23", "uah4", "uáh"), + ("uah5", "uah8", "uah"), + ("yong55", "iong1", "iong"), + ("yong53", "iong5", "iòng"), + ("yong33", "iong2", "iōng"), + ("yong212", "iong3", "ióng"), + ("yong242", "iong7", "iông"), + ("yok23", "iok4", "iók"), + ("yok5", "iok8", "iok"), + ("buong55", "biong1", "biong"), + ("buong53", "biong5", "biòng"), + ("buong33", "biong2", "biōng"), + ("buong212", "biong3", "bióng"), + ("buong242", "biong7", "biông"), + ("buok23", "biok4", "biók"), + ("buok5", "biok8", "biok"), + ("iu55", "iu1", "iu"), + ("iu53", "iu5", "iù"), + ("iu33", "iu2", "iū"), + ("iu212", "iu3", "éu"), + ("iu242", "iu7", "êu"), + #("iuh23", "iuh4", "éuh"), + #("iuh5", "iuh8", "iuh"), + ("ang55", "ang1", "ang"), + ("ang53", "ang5", "àng"), + ("ang33", "ang2", "āng"), + ("ang212", "ang3", "áng"), + ("ang242", "ang7", "âng"), + ("ak23", "ak4", "ák"), + ("ak5", "ak8", "ak"), + ("ai55", "ai1", "ai"), + ("ai53", "ai5", "ài"), + ("ai33", "ai2", "āi"), + ("ai212", "ai3", "ái"), + ("ai242", "ai7", "âi"), + #("aih23", "aih4", "áih"), + #("aih5", "aih8", "aih"), + ("a55", "a1", "a"), + ("a53", "a5", "à"), + ("a33", "a2", "ā"), + ("a212", "a3", "á"), + ("a242", "a7", "â"), + ("ah23", "ah4", "áh"), + ("ah5", "ah8", "ah"), + ("ing55", "ing1", "ing"), + ("ing53", "ing5", "ìng"), + ("ing33", "ing2", "īng"), + ("eing212", "ing3", "éng"), + ("eing242", "ing7", "êng"), + ("eik23", "ik4", "ék"), + ("ik5", "ik8", "ik"), + ("uang55", "uang1", "uang"), + ("uang53", "uang5", "uàng"), + ("uang33", "uang2", "uāng"), + ("uang212", "uang3", "uáng"), + ("uang242", "uang7", "uâng"), + ("uak23", "uak4", "uák"), + ("uak5", "uak8", "uak"), + ("o55", "o1", "o̤"), + ("o53", "o5", "ò̤"), + ("o33", "o2", "ō̤"), + ("o212", "o3", "ó̤"), + ("o242", "o7", "ô̤"), + ("oh23", "oh4", "ó̤h"), + ("oh5", "oh8", "o̤h"), + ("y55", "y1", "ṳ"), + ("y53", "y5", "ṳ̀"), + ("y33", "y2", "ṳ̄"), + ("oey212", "y3", "é̤ṳ"), + ("oey242", "y7", "ê̤ṳ"), + ("oeyk23", "yk4", "é̤ṳk"), + ("yk5", "yk8", "ṳk"), + ("ui55", "uoi1", "uoi"), + ("ui53", "uoi5", "uòi"), + ("ui33", "uoi2", "uōi"), + ("ui212", "uoi3", "uói"), + ("ui242", "uoi7", "uôi"), + #("uih23", "uoih4", "uóih"), + #("uih5", "uoih8", "uoih"), + ("u55", "u1", "u"), + ("u53", "u5", "ù"), + ("u33", "u2", "ū"), + ("ou212", "u3", "ó"), + ("ou242", "u7", "ô"), + ("ouh23", "uh4", "óh"), + ("uh5", "uh8", "uh"), + ("eing55", "eng1", "eng"), + ("eing53", "eng5", "èng"), + ("eing33", "eng2", "ēng"), + ("aing212", "eng3", "áing"), + ("aing242", "eng7", "âing"), + ("aik23", "ek4", "áik"), + ("eik5", "ek8", "ek"), + ("uong55", "uong1", "uong"), + ("uong53", "uong5", "uòng"), + ("uong33", "uong2", "uōng"), + ("uong212", "uong3", "uóng"), + ("uong242", "uong7", "uông"), + ("uok23", "uok4", "uók"), + ("uok5", "uok8", "uok"), + ("ui55", "ui1", "ui"), + ("ui53", "ui5", "ùi"), + ("ui33", "ui2", "ūi"), + ("ui212", "ui3", "ói"), + ("ui242", "ui7", "ôi"), + #("uih23", "uih4", "óih"), + #("uih5", "uih8", "uih"), + ("iu55", "ieu1", "ieu"), + ("iu53", "ieu5", "ièu"), + ("iu33", "ieu2", "iēu"), + ("iu212", "ieu3", "iéu"), + ("iu242", "ieu7", "iêu"), + #("iuh23", "ieuh4", "iéuh"), + #("iuh5", "ieuh8", "ieuh"), + ("yng55", "yng1", "ṳng"), + ("yng53", "yng5", "ṳ̀ng"), + ("yng33", "yng2", "ṳ̄ng"), + ("oeyng212", "yng3", "é̤ṳng"), + ("oeyng242", "yng7", "ê̤ṳng"), + ("oeyk23", "yk4", "é̤ṳk"), + ("yk5", "yk8", "ṳk"), + ("oung55", "ong1", "ong"), + ("oung53", "ong5", "òng"), + ("oung33", "ong2", "ōng"), + ("aung212", "ong3", "áung"), + ("aung242", "ong7", "âung"), + ("auk23", "ok4", "áuk"), + ("ouk5", "ok8", "ok"), + ("i55", "i1", "i"), + ("i53", "i5", "ì"), + ("i33", "i2", "ī"), + ("ei212", "i3", "é"), + ("ei242", "i7", "ê"), + ("eih23", "ih4", "éh"), + ("ih5", "ih8", "ih"), + ("oeyng55", "oeng1", "e̤ng"), + ("oeyng53", "oeng5", "è̤ng"), + ("oeyng33", "oeng2", "ē̤ng"), + ("oyng212", "oeng3", "áe̤ng"), + ("oyng242", "oeng7", "âe̤ng"), + ("oyk23", "oek4", "áe̤k"), + ("oeyk5", "oek8", "e̤k"), + ("au55", "au1", "au"), + ("au53", "au5", "àu"), + ("au33", "au2", "āu"), + ("au212", "au3", "áu"), + ("au242", "au7", "âu"), + ("auh23", "auh4", "áuh"), + #("auh5", "auh8", "auh"), + ("uo55", "uo1", "uo"), + ("uo53", "uo5", "uò"), + ("uo33", "uo2", "uō"), + ("uo212", "uo3", "uó"), + ("uo242", "uo7", "uô"), + ("uoh23", "uoh4", "uóh"), + ("uoh5", "uoh8", "uoh"), + ("e55", "e1", "a̤"), + ("e53", "e5", "à̤"), + ("e33", "e2", "ā̤"), + ("a212", "e3", "á̤"), + ("a242", "e7", "â̤"), + ("ah23", "eh4", "á̤h"), + #("eh5", "eh8", "a̤h"), + ("yo55", "io1", "io"), + ("yo53", "io5", "iò"), + ("yo33", "io2", "iō"), + ("yo212", "io3", "ió"), + ("yo242", "io7", "iô"), + ("yoh23", "ioh4", "ióh"), + ("yoh5", "ioh8", "ioh"), + ("cuo55", "cio1", "chio"), + ("cuo53", "cio5", "chiò"), + ("cuo33", "cio2", "chiō"), + ("cuo212", "cio3", "chió"), + ("cuo242", "cio7", "chiô"), + ("cuoh23", "cioh4", "chióh"), + #("cuoh5", "cioh8", "chioh"), + ("ie55", "ie1", "ie"), + ("ie53", "ie5", "iè"), + ("ie33", "ie2", "iē"), + ("ie212", "ie3", "ié"), + ("ie242", "ie7", "iê"), + ("ieh23", "ieh4", "iéh"), + #("ieh5", "ieh8", "ieh"), + ("iang55", "iang1", "iang"), + ("iang53", "iang5", "iàng"), + ("iang33", "iang2", "iāng"), + ("iang212", "iang3", "iáng"), + ("iang242", "iang7", "iâng"), + ("iak23", "iak4", "iák"), + #("iak5", "iak8", "iak"), + ("oey55", "oi1", "oi"), + ("oey53", "oi5", "òi"), + ("oey33", "oi2", "ōi"), + ("oy212", "oi3", "ó̤i"), + ("oy242", "oi7", "ô̤i"), + ("oyh23", "oih4", "ó̤ih"), + #("oeyh5", "oih8", "oih"), + ("oe55", "oe1", "e̤"), + ("oe53", "oe5", "è̤"), + ("oe33", "oe2", "ē̤"), + ("o212", "oe3", "áe̤"), + ("o242", "oe7", "âe̤"), + #("oeh23", "oe4", "e̤h"), + #("oeh5", "oe8", "e̤h"), + ("ieng55", "ieng1", "ieng"), + ("ieng53", "ieng5", "ièng"), + ("ieng33", "ieng2", "iēng"), + ("ieng212", "ieng3", "iéng"), + ("ieng242", "ieng7", "iêng"), + ("iek23", "iek4", "iék"), + #("iek5", "iek8", "iek"), + ("ia55", "ia1", "ia"), + ("ia53", "ia5", "ià"), + ("ia33", "ia2", "iā"), + ("ia212", "ia3", "iá"), + ("ia242", "ia7", "iâ"), + ("ieh23", "iah4", "iáh"), + #("ieh5", "iah8", "iah"), + ("uai55", "uai1", "uai"), + ("uai53", "uai5", "uài"), + ("uai33", "uai2", "uāi"), + ("uai212", "uai3", "uái"), + ("uai242", "uai7", "uâi"), + #("uaih23", "uaih4", "uáih"), + #("uaih5", "uaih8", "uaih"), + ("eu55", "eu1", "eu"), + ("eu53", "eu5", "èu"), + ("eu33", "eu2", "ēu"), + ("au212", "eu3", "áiu"), + ("au242", "eu7", "âiu"), + ("auh23", "eu4", "áiuh"), + #("euh5", "eu8", "euh"), + ] + + for yp, hector, fr in TESTS: + with self.subTest(msg="測試歷史音系轉換 YP=%s HECTOR=%s FR=%s" % (yp, hector, fr)): + self.assertEqual(yp, hector_to_yngping(hector)) + self.assertEqual(yp, foochow_romanized_to_yngping_string(fr, True)) diff --git a/pyhokchew/utils.py b/pyhokchew/utils.py index 4565c8c..86f06d5 100644 --- a/pyhokchew/utils.py +++ b/pyhokchew/utils.py @@ -13,3 +13,7 @@ def normalise(s) -> str: def denormalise(s) -> str: return unicodedata.normalize('NFKD', s) + +def strip_accents(s): + return ''.join(c for c in unicodedata.normalize('NFD', s) + if unicodedata.category(c) != 'Mn') \ No newline at end of file From 5f56fe8bd2b237124230f7bdf6115e893d731276 Mon Sep 17 00:00:00 2001 From: Radium Zheng Date: Sat, 10 Nov 2018 14:06:30 +1100 Subject: [PATCH 7/7] cleanup --- pyhokchew/tests/ConversionTest.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pyhokchew/tests/ConversionTest.py b/pyhokchew/tests/ConversionTest.py index 93b0b0c..cdf58ed 100644 --- a/pyhokchew/tests/ConversionTest.py +++ b/pyhokchew/tests/ConversionTest.py @@ -1,7 +1,8 @@ import unittest from ..utils import normalise, denormalise from ..convert import wuyixing_to_yngping, minjiang_to_yngping, hector_to_foochow_romanized_string, \ - hector_to_yngping, foochow_romanized_to_yngping_string + hector_to_yngping, foochow_romanized_to_yngping_string, hector_to_foochow_romanized +from ..models.historical.FoochowRomanized import FoochowRomanizedSyllable class ConversionTestCase(unittest.TestCase): def setUp(self): @@ -310,3 +311,10 @@ def test_historical_to_yngping(self): with self.subTest(msg="測試歷史音系轉換 YP=%s HECTOR=%s FR=%s" % (yp, hector, fr)): self.assertEqual(yp, hector_to_yngping(hector)) self.assertEqual(yp, foochow_romanized_to_yngping_string(fr, True)) + + # 擬音轉羅馬字 + f1 = hector_to_foochow_romanized(hector) + f2 = FoochowRomanizedSyllable.from_string(fr, True) + self.assertEqual(f1.initial, f2.initial) + self.assertEqual(f1.final, f2.final) + self.assertEqual(f1.tone, f2.tone)