VOICEVOX · Hiroshiba · Feb 17, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs
@@ -18,6 +18,7 @@ mod voice_model;
 
 pub mod __internal;
 pub mod blocking;
+pub mod text_analyzer;
 pub mod tokio;
 
 #[cfg(test)]

diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
@@ -80,7 +80,7 @@ pub(crate) mod blocking {
     use enum_map::enum_map;
 
     use crate::{
-        engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance},
+        engine::{self, create_kana, MoraModel, OjtPhoneme},
         error::ErrorRepr,
         infer::{
             domain::{
@@ -92,6 +92,7 @@ pub(crate) mod blocking {
             InferenceSessionOptions,
         },
         numerics::F32Ext as _,
+        text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer},
         AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
         SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta,
     };
@@ -103,7 +104,8 @@ pub(crate) mod blocking {
     /// 音声シンセサイザ。
     pub struct Synthesizer<O> {
         pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
-        open_jtalk: O,
+        open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
+        kana_analyzer: KanaAnalyzer,
         use_gpu: bool,
     }
 
@@ -176,7 +178,8 @@ pub(crate) mod blocking {
 
             return Ok(Self {
                 status,
-                open_jtalk,
+                open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk),
+                kana_analyzer: KanaAnalyzer::new(),
                 use_gpu,
             });
 
@@ -457,7 +460,8 @@ pub(crate) mod blocking {
             kana: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhraseModel>> {
-            self.replace_mora_data(&parse_kana(kana)?, style_id)
+            let accent_phrases = self.kana_analyzer.analyze(kana)?;
+            self.replace_mora_data(&accent_phrases, style_id)
         }
 
         /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
@@ -743,75 +747,7 @@ pub(crate) mod blocking {
             text: &str,
             style_id: StyleId,
         ) -> Result<Vec<AccentPhraseModel>> {
-            if text.is_empty() {
-                return Ok(Vec::new());
-            }
-
-            let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;
-
-            let accent_phrases: Vec<AccentPhraseModel> = utterance
-                .breath_groups()
-                .iter()
-                .enumerate()
-                .fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
-                    accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
-                        |(j, accent_phrase)| {
-                            let moras = accent_phrase
-                                .moras()
-                                .iter()
-                                .map(|mora| {
-                                    let mora_text = mora
-                                        .phonemes()
-                                        .iter()
-                                        .map(|phoneme| phoneme.phoneme().to_string())
-                                        .collect::<Vec<_>>()
-                                        .join("");
-
-                                    let (consonant, consonant_length) =
-                                        if let Some(consonant) = mora.consonant() {
-                                            (Some(consonant.phoneme().to_string()), Some(0.))
-                                        } else {
-                                            (None, None)
-                                        };
-
-                                    MoraModel::new(
-                                        mora_to_text(mora_text),
-                                        consonant,
-                                        consonant_length,
-                                        mora.vowel().phoneme().into(),
-                                        0.,
-                                        0.,
-                                    )
-                                })
-                                .collect();
-
-                            let pause_mora = if i != utterance.breath_groups().len() - 1
-                                && j == breath_group.accent_phrases().len() - 1
-                            {
-                                Some(MoraModel::new(
-                                    "、".into(),
-                                    None,
-                                    None,
-                                    "pau".into(),
-                                    0.,
-                                    0.,
-                                ))
-                            } else {
-                                None
-                            };
-
-                            AccentPhraseModel::new(
-                                moras,
-                                *accent_phrase.accent(),
-                                pause_mora,
-                                *accent_phrase.is_interrogative(),
-                            )
-                        },
-                    ));
-
-                    accum_vec
-                });
-
+            let accent_phrases = self.open_jtalk_analyzer.analyze(text)?;
             self.replace_mora_data(&accent_phrases, style_id)
         }
 

diff --git a/crates/voicevox_core/src/text_analyzer.rs b/crates/voicevox_core/src/text_analyzer.rs
@@ -0,0 +1,128 @@
+use crate::{
+    engine::{self, parse_kana, MoraModel, Utterance},
+    AccentPhraseModel, FullcontextExtractor, Result,
+};
+
+pub trait TextAnalyzer {
+    fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>>;
+}
+
+/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer
+#[derive(Clone)]
+pub struct KanaAnalyzer;
+
+impl KanaAnalyzer {
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl TextAnalyzer for KanaAnalyzer {
+    fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
+        if text.is_empty() {
+            return Ok(Vec::new());
+        }
+        Ok(parse_kana(text)?)
+    }
+}
+
+/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer
+#[derive(Clone)]
+pub struct OpenJTalkAnalyzer<O>(O);
+
+impl<O> OpenJTalkAnalyzer<O> {
+    pub fn new(open_jtalk: O) -> Self {
+        Self(open_jtalk)
+    }
+}
+
+impl<O: FullcontextExtractor> TextAnalyzer for OpenJTalkAnalyzer<O> {
+    fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
+        if text.is_empty() {
+            return Ok(Vec::new());
+        }
+        let utterance = Utterance::extract_full_context_label(&self.0, text)?;
+        Ok(utterance_to_accent_phrases(utterance))
+    }
+}
+
+fn utterance_to_accent_phrases(utterance: Utterance) -> Vec<AccentPhraseModel> {
+    let accent_phrases: Vec<AccentPhraseModel> = utterance.breath_groups().iter().enumerate().fold(
+        Vec::new(),
+        |mut accum_vec, (i, breath_group)| {
+            accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
+                |(j, accent_phrase)| {
+                    let moras = accent_phrase
+                        .moras()
+                        .iter()
+                        .map(|mora| {
+                            let mora_text = mora
+                                .phonemes()
+                                .iter()
+                                .map(|phoneme| phoneme.phoneme().to_string())
+                                .collect::<Vec<_>>()
+                                .join("");
+
+                            let (consonant, consonant_length) =
+                                if let Some(consonant) = mora.consonant() {
+                                    (Some(consonant.phoneme().to_string()), Some(0.))
+                                } else {
+                                    (None, None)
+                                };
+
+                            MoraModel::new(
+                                mora_to_text(mora_text),
+                                consonant,
+                                consonant_length,
+                                mora.vowel().phoneme().into(),
+                                0.,
+                                0.,
+                            )
+                        })
+                        .collect();
+
+                    let pause_mora = if i != utterance.breath_groups().len() - 1
+                        && j == breath_group.accent_phrases().len() - 1
+                    {
+                        Some(MoraModel::new(
+                            "、".into(),
+                            None,
+                            None,
+                            "pau".into(),
+                            0.,
+                            0.,
+                        ))
+                    } else {
+                        None
+                    };
+
+                    AccentPhraseModel::new(
+                        moras,
+                        *accent_phrase.accent(),
+                        pause_mora,
+                        *accent_phrase.is_interrogative(),
+                    )
+                },
+            ));
+
+            accum_vec
+        },
+    );
+
+    accent_phrases
+}
+
+fn mora_to_text(mora: impl AsRef<str>) -> String {
+    let last_char = mora.as_ref().chars().last().unwrap();
+    let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
+        format!(
+            "{}{}",
+            &mora.as_ref()[0..mora.as_ref().len() - 1],
+            last_char.to_lowercase()
+        )
+    } else {
+        mora.as_ref().to_string()
+    };
+    // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
+    engine::mora2text(&mora).to_string()
+}