From 5e0790c7e050957a117cae807a165aeced9c11a8 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Fri, 28 Jan 2022 22:28:31 +0900 Subject: [PATCH] To 0.10.1 (#311) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 疑似疑問文化をsynthesis APIの直前に移動 (#310) * append interrogative mora just before synthesis * parse_kana should not append interrogative mora * #310 の追加修正 * update doc Co-authored-by: Yosshi999 --- docs/api/index.html | 2 +- run.py | 5 +- test/test_kana_parser.py | 20 -- test/test_synthesis_engine_base.py | 241 +++++++++++------- voicevox_engine/cancellable_engine.py | 2 +- voicevox_engine/dev/synthesis_engine/mock.py | 2 +- voicevox_engine/kana_parser.py | 13 +- .../synthesis_engine/synthesis_engine.py | 2 +- .../synthesis_engine/synthesis_engine_base.py | 26 +- 9 files changed, 176 insertions(+), 137 deletions(-) diff --git a/docs/api/index.html b/docs/api/index.html index dc18472dd..bed0512e3 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -9,7 +9,7 @@
\ No newline at end of file diff --git a/run.py b/run.py index 00e652d54..3d1d9cd0b 100644 --- a/run.py +++ b/run.py @@ -34,9 +34,6 @@ ) from voicevox_engine.preset import Preset, PresetLoader from voicevox_engine.synthesis_engine import SynthesisEngineBase, make_synthesis_engine -from voicevox_engine.synthesis_engine.synthesis_engine_base import ( - adjust_interrogative_accent_phrases, -) from voicevox_engine.utility import ConnectBase64WavesException, connect_base64_waves @@ -196,7 +193,7 @@ def accent_phrases( accent_phrases=accent_phrases, speaker_id=speaker ) - return adjust_interrogative_accent_phrases(accent_phrases) + return accent_phrases else: return engine.create_accent_phrases( text, diff --git a/test/test_kana_parser.py b/test/test_kana_parser.py index 6c6e5aa59..bd41450b4 100644 --- a/test/test_kana_parser.py +++ b/test/test_kana_parser.py @@ -441,16 +441,6 @@ def a_question_mark_accent_phrases(): expected_accent_phrases = a_question_mark_accent_phrases() expected_accent_phrases[-1].is_interrogative = True - expected_accent_phrases[-1].moras.append( - Mora( - text="ア", - consonant=None, - consonant_length=None, - vowel="a", - vowel_length=0.0, - pitch=0.0, - ) - ) self._interrogative_accent_phrase_marks_base( text="ア'?", enable_interrogative=True, @@ -519,16 +509,6 @@ def gye_gye_gye_question_mark_accent_phrases(): expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases() expected_accent_phrases[-1].is_interrogative = True - expected_accent_phrases[-1].moras.append( - Mora( - text="エ", - consonant=None, - consonant_length=None, - vowel="e", - vowel_length=0.0, - pitch=0.0, - ) - ) self._interrogative_accent_phrase_marks_base( text="ギェ'、ギェ'/ギェ'?", enable_interrogative=True, diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py index 4309e7c90..365e17395 100644 --- a/test/test_synthesis_engine_base.py +++ b/test/test_synthesis_engine_base.py @@ -4,7 +4,7 @@ import numpy -from voicevox_engine.model import AccentPhrase, Mora +from voicevox_engine.model import AccentPhrase, AudioQuery, Mora from voicevox_engine.synthesis_engine import SynthesisEngine @@ -70,6 +70,104 @@ def decode_mock( return numpy.array(result) +def koreha_arimasuka_base_expected(): + return [ + AccentPhrase( + moras=[ + Mora( + text="コ", + consonant="k", + consonant_length=2.44, + vowel="o", + vowel_length=2.88, + pitch=4.38, + ), + Mora( + text="レ", + consonant="r", + consonant_length=3.06, + vowel="e", + vowel_length=1.88, + pitch=4.0, + ), + Mora( + text="ワ", + consonant="w", + consonant_length=3.62, + vowel="a", + vowel_length=1.44, + pitch=4.19, + ), + ], + accent=3, + pause_mora=None, + is_interrogative=False, + ), + AccentPhrase( + moras=[ + Mora( + text="ア", + consonant=None, + consonant_length=None, + vowel="a", + vowel_length=1.44, + pitch=1.44, + ), + Mora( + text="リ", + consonant="r", + consonant_length=3.06, + vowel="i", + vowel_length=2.31, + pitch=4.44, + ), + Mora( + text="マ", + consonant="m", + consonant_length=2.62, + vowel="a", + vowel_length=1.44, + pitch=3.12, + ), + Mora( + text="ス", + consonant="s", + consonant_length=3.19, + vowel="U", + vowel_length=1.38, + pitch=0.0, + ), + Mora( + text="カ", + consonant="k", + consonant_length=2.44, + vowel="a", + vowel_length=1.44, + pitch=2.94, + ), + ], + accent=3, + pause_mora=None, + is_interrogative=False, + ), + ] + + +def create_mock_query(accent_phrases): + return AudioQuery( + accent_phrases=accent_phrases, + speedScale=1, + pitchScale=0, + intonationScale=1, + volumeScale=1, + prePhonemeLength=0.1, + postPhonemeLength=0.1, + outputSamplingRate=24000, + outputStereo=False, + kana="", + ) + + class TestSynthesisEngineBase(TestCase): def setUp(self): super().setUp() @@ -79,6 +177,7 @@ def setUp(self): decode_forwarder=Mock(side_effect=decode_mock), speakers="", ) + self.synthesis_engine._synthesis_impl = Mock() def create_accent_phrases_test_base( self, text: str, expected: List[AccentPhrase], enable_interrogative: bool @@ -96,89 +195,43 @@ def create_accent_phrases_test_base( + ")", ) + def create_synthesis_test_base( + self, text: str, expected: List[AccentPhrase], enable_interrogative: bool + ): + """音声合成時に疑問文モーラ処理を行っているかどうかを検証 + (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866) + """ + accent_phrases = self.synthesis_engine.create_accent_phrases( + text, 1, enable_interrogative + ) + query = create_mock_query(accent_phrases=accent_phrases) + self.synthesis_engine.synthesis(query, 0) + # _synthesis_implの第一引数に与えられたqueryを検証 + actual = self.synthesis_engine._synthesis_impl.call_args[0][0].accent_phrases + + self.assertEqual( + expected, + actual, + "case(text:" + + text + + ",enable_interrogative:" + + str(enable_interrogative) + + ")", + ) + def test_create_accent_phrases(self): - def koreha_arimasuka_base_expected(): - return [ - AccentPhrase( - moras=[ - Mora( - text="コ", - consonant="k", - consonant_length=2.44, - vowel="o", - vowel_length=2.88, - pitch=4.38, - ), - Mora( - text="レ", - consonant="r", - consonant_length=3.06, - vowel="e", - vowel_length=1.88, - pitch=4.0, - ), - Mora( - text="ワ", - consonant="w", - consonant_length=3.62, - vowel="a", - vowel_length=1.44, - pitch=4.19, - ), - ], - accent=3, - pause_mora=None, - is_interrogative=False, - ), - AccentPhrase( - moras=[ - Mora( - text="ア", - consonant=None, - consonant_length=None, - vowel="a", - vowel_length=1.44, - pitch=1.44, - ), - Mora( - text="リ", - consonant="r", - consonant_length=3.06, - vowel="i", - vowel_length=2.31, - pitch=4.44, - ), - Mora( - text="マ", - consonant="m", - consonant_length=2.62, - vowel="a", - vowel_length=1.44, - pitch=3.12, - ), - Mora( - text="ス", - consonant="s", - consonant_length=3.19, - vowel="U", - vowel_length=1.38, - pitch=0.0, - ), - Mora( - text="カ", - consonant="k", - consonant_length=2.44, - vowel="a", - vowel_length=1.44, - pitch=2.94, - ), - ], - accent=3, - pause_mora=None, - is_interrogative=False, - ), - ] + """accent_phrasesの作成時では疑問文モーラ処理を行わない + (https://github.com/VOICEVOX/voicevox_engine/issues/272#issuecomment-1022610866) + """ + expected = koreha_arimasuka_base_expected() + expected[-1].is_interrogative = True + self.create_accent_phrases_test_base( + text="これはありますか?", + expected=expected, + enable_interrogative=True, + ) + def test_synthesis_interrogative(self): expected = koreha_arimasuka_base_expected() expected[-1].is_interrogative = True expected[-1].moras += [ @@ -191,21 +244,21 @@ def koreha_arimasuka_base_expected(): pitch=expected[-1].moras[-1].pitch + 0.3, ) ] - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="これはありますか?", expected=expected, enable_interrogative=True, ) expected = koreha_arimasuka_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="これはありますか?", expected=expected, enable_interrogative=False, ) expected = koreha_arimasuka_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="これはありますか", expected=expected, enable_interrogative=True, @@ -231,7 +284,7 @@ def nn_base_expected(): ] expected = nn_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="ん", expected=expected, enable_interrogative=True, @@ -249,14 +302,14 @@ def nn_base_expected(): pitch=expected[-1].moras[-1].pitch + 0.3, ) ] - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="ん?", expected=expected, enable_interrogative=True, ) expected = nn_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="ん?", expected=expected, enable_interrogative=False, @@ -282,7 +335,7 @@ def ltu_base_expected(): ] expected = ltu_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="っ", expected=expected, enable_interrogative=True, @@ -290,14 +343,14 @@ def ltu_base_expected(): expected = ltu_base_expected() expected[-1].is_interrogative = True - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="っ?", expected=expected, enable_interrogative=True, ) expected = ltu_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="っ?", expected=expected, enable_interrogative=False, @@ -323,7 +376,7 @@ def su_base_expected(): ] expected = su_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="す", expected=expected, enable_interrogative=True, @@ -341,14 +394,14 @@ def su_base_expected(): pitch=expected[-1].moras[-1].pitch + 0.3, ) ] - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="す?", expected=expected, enable_interrogative=True, ) expected = su_base_expected() - self.create_accent_phrases_test_base( + self.create_synthesis_test_base( text="す?", expected=expected, enable_interrogative=False, diff --git a/voicevox_engine/cancellable_engine.py b/voicevox_engine/cancellable_engine.py index cb060f076..0234c335a 100644 --- a/voicevox_engine/cancellable_engine.py +++ b/voicevox_engine/cancellable_engine.py @@ -116,7 +116,7 @@ def finalize_con( # プロセスが死んでいるので新しく作り直す self.procs_and_cons.put(self.start_new_proc()) - def synthesis( + def _synthesis_impl( self, query: AudioQuery, speaker_id: Speaker, request: Request ) -> str: """ diff --git a/voicevox_engine/dev/synthesis_engine/mock.py b/voicevox_engine/dev/synthesis_engine/mock.py index 1da12de3d..cd265c6e3 100644 --- a/voicevox_engine/dev/synthesis_engine/mock.py +++ b/voicevox_engine/dev/synthesis_engine/mock.py @@ -64,7 +64,7 @@ def replace_mora_pitch( """ return accent_phrases - def synthesis(self, query: AudioQuery, speaker_id: int) -> np.ndarray: + def _synthesis_impl(self, query: AudioQuery, speaker_id: int) -> np.ndarray: """ synthesis voicevox coreを使わずに、音声合成する [Mock] diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/kana_parser.py index dc842fe0e..1b3067908 100644 --- a/voicevox_engine/kana_parser.py +++ b/voicevox_engine/kana_parser.py @@ -1,7 +1,7 @@ from typing import List, Optional from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode -from .mora_list import openjtalk_mora2text, openjtalk_text2mora +from .mora_list import openjtalk_text2mora LOOP_LIMIT = 300 UNVOICE_SYMBOL = "_" @@ -114,17 +114,6 @@ def parse_kana(text: str, enable_interrogative: bool) -> List[AccentPhrase]: if enable_interrogative and is_interrogative_text: last_parsed_result = parsed_results[-1] - last_mora = last_parsed_result.moras[-1] - last_parsed_result.moras.append( - Mora( - text=openjtalk_mora2text[last_mora.vowel], - consonant=None, - consonant_length=None, - vowel=last_mora.vowel, - vowel_length=last_mora.vowel_length, - pitch=0, - ) - ) last_parsed_result.is_interrogative = True return parsed_results diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py index 8e302464a..e5fa0ae03 100644 --- a/voicevox_engine/synthesis_engine/synthesis_engine.py +++ b/voicevox_engine/synthesis_engine/synthesis_engine.py @@ -354,7 +354,7 @@ def _create_one_hot(accent_phrase: AccentPhrase, position: int): return accent_phrases - def synthesis(self, query: AudioQuery, speaker_id: int): + def _synthesis_impl(self, query: AudioQuery, speaker_id: int): """ 音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う Parameters diff --git a/voicevox_engine/synthesis_engine/synthesis_engine_base.py b/voicevox_engine/synthesis_engine/synthesis_engine_base.py index 34cb2b5e4..272550b78 100644 --- a/voicevox_engine/synthesis_engine/synthesis_engine_base.py +++ b/voicevox_engine/synthesis_engine/synthesis_engine_base.py @@ -23,7 +23,7 @@ def adjust_interrogative_accent_phrases( ) -> List[AccentPhrase]: """ enable_interrogativeが有効になっていて与えられたaccent_phrasesに疑問系のものがあった場合、 - SynthesisEngineの実装によって調整されたあとの各accent_phraseの末尾にある疑問系発音用のMoraに対して直前のMoraより少し音を高くすることで疑問文ぽくする + 各accent_phraseの末尾にある疑問系発音用のMoraに対して直前のMoraより少し音を高くすることで疑問文ぽくする NOTE: リファクタリング時に適切な場所へ移動させること """ return [ @@ -169,10 +169,30 @@ def create_accent_phrases( ], speaker_id=speaker_id, ) - return adjust_interrogative_accent_phrases(accent_phrases) + return accent_phrases - @abstractmethod def synthesis(self, query: AudioQuery, speaker_id: int): + """ + 音声合成クエリ内の疑問文指定されたMoraを変形した後、 + 継承先における実装`_synthesis_impl`を使い音声合成を行う + Parameters + ---------- + query : AudioQuery + 音声合成クエリ + speaker_id : int + 話者ID + Returns + ------- + wave : numpy.ndarray + 音声合成結果 + """ + # モーフィング時などに同一参照のqueryで複数回呼ばれる可能性があるので、元の引数のqueryに破壊的変更を行わない + query = copy.deepcopy(query) + query.accent_phrases = adjust_interrogative_accent_phrases(query.accent_phrases) + return self._synthesis_impl(query, speaker_id) + + @abstractmethod + def _synthesis_impl(self, query: AudioQuery, speaker_id: int): """ 音声合成クエリから音声合成に必要な情報を構成し、実際に音声合成を行う Parameters