Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TextAnalyzer traitにstring->AccentPhraseModel[]を移動 #740

Merged
merged 8 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/voicevox_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod voice_model;

pub mod __internal;
pub mod blocking;
pub mod text_analyzer;
pub mod tokio;

#[cfg(test)]
Expand Down
97 changes: 9 additions & 88 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) mod blocking {
use enum_map::enum_map;

use crate::{
engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance},
engine::{create_kana, MoraModel, OjtPhoneme},
error::ErrorRepr,
infer::{
domain::{
Expand All @@ -92,6 +92,7 @@ pub(crate) mod blocking {
InferenceSessionOptions,
},
numerics::F32Ext as _,
text_analyzer::{mora_to_text, KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer},
AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta,
};
Expand All @@ -103,7 +104,8 @@ pub(crate) mod blocking {
/// 音声シンセサイザ。
pub struct Synthesizer<O> {
pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
open_jtalk: O,
open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
kana_analyzer: KanaAnalyzer,
use_gpu: bool,
}

Expand Down Expand Up @@ -176,7 +178,8 @@ pub(crate) mod blocking {

return Ok(Self {
status,
open_jtalk,
open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk),
kana_analyzer: KanaAnalyzer,
use_gpu,
});

Expand Down Expand Up @@ -457,7 +460,8 @@ pub(crate) mod blocking {
kana: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
self.replace_mora_data(&parse_kana(kana)?, style_id)
let accent_phrases = self.kana_analyzer.analyze(kana)?;
self.replace_mora_data(&accent_phrases, style_id)
}

/// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
Expand Down Expand Up @@ -743,75 +747,7 @@ pub(crate) mod blocking {
text: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}

let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;

let accent_phrases: Vec<AccentPhraseModel> = utterance
.breath_groups()
.iter()
.enumerate()
.fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
});

let accent_phrases = self.open_jtalk_analyzer.analyze(text)?;
self.replace_mora_data(&accent_phrases, style_id)
}

Expand Down Expand Up @@ -1175,21 +1111,6 @@ pub(crate) mod blocking {
(consonant_phoneme_list, vowel_phoneme_list, vowel_indexes)
}

fn mora_to_text(mora: impl AsRef<str>) -> String {
let last_char = mora.as_ref().chars().last().unwrap();
let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
format!(
"{}{}",
&mora.as_ref()[0..mora.as_ref().len() - 1],
last_char.to_lowercase()
)
} else {
mora.as_ref().to_string()
};
// もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
engine::mora2text(&mora).to_string()
}

impl AudioQueryModel {
fn from_accent_phrases(accent_phrases: Vec<AccentPhraseModel>) -> Self {
let kana = create_kana(&accent_phrases);
Expand Down
122 changes: 122 additions & 0 deletions crates/voicevox_core/src/text_analyzer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use crate::{
engine::{self, parse_kana, MoraModel, Utterance},
AccentPhraseModel, FullcontextExtractor, Result,
};

pub trait TextAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>>;
}

/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct KanaAnalyzer;

impl TextAnalyzer for KanaAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
Ok(parse_kana(text)?)
}
}

/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct OpenJTalkAnalyzer<O>(O);

impl<O> OpenJTalkAnalyzer<O> {
pub fn new(open_jtalk: O) -> Self {
Self(open_jtalk)
}
}

impl<O: FullcontextExtractor> TextAnalyzer for OpenJTalkAnalyzer<O> {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
let utterance = Utterance::extract_full_context_label(&self.0, text)?;
Ok(utterance_to_accent_phrases(utterance))
}
}

fn utterance_to_accent_phrases(utterance: Utterance) -> Vec<AccentPhraseModel> {
let accent_phrases: Vec<AccentPhraseModel> = utterance.breath_groups().iter().enumerate().fold(
Vec::new(),
|mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
},
);

accent_phrases
}

pub fn mora_to_text(mora: impl AsRef<str>) -> String {
let last_char = mora.as_ref().chars().last().unwrap();
let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
format!(
"{}{}",
&mora.as_ref()[0..mora.as_ref().len() - 1],
last_char.to_lowercase()
)
} else {
mora.as_ref().to_string()
};
// もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
engine::mora2text(&mora).to_string()
}
Loading