Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TextAnalyzer traitにstring->AccentPhraseModel[]を移動 #740

Merged
merged 8 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/voicevox_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod voice_model;

pub mod __internal;
pub mod blocking;
pub mod text_analyzer;
pub mod tokio;

#[cfg(test)]
Expand Down
82 changes: 9 additions & 73 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) mod blocking {
use enum_map::enum_map;

use crate::{
engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance},
engine::{self, create_kana, MoraModel, OjtPhoneme},
error::ErrorRepr,
infer::{
domain::{
Expand All @@ -92,6 +92,7 @@ pub(crate) mod blocking {
InferenceSessionOptions,
},
numerics::F32Ext as _,
text_analyzer::{KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer},
AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta,
};
Expand All @@ -103,7 +104,8 @@ pub(crate) mod blocking {
/// 音声シンセサイザ。
pub struct Synthesizer<O> {
pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
open_jtalk: O,
open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
kana_analyzer: KanaAnalyzer,
use_gpu: bool,
}

Expand Down Expand Up @@ -176,7 +178,8 @@ pub(crate) mod blocking {

return Ok(Self {
status,
open_jtalk,
open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk),
kana_analyzer: KanaAnalyzer::new(),
use_gpu,
});

Expand Down Expand Up @@ -457,7 +460,8 @@ pub(crate) mod blocking {
kana: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
self.replace_mora_data(&parse_kana(kana)?, style_id)
let accent_phrases = self.kana_analyzer.analyze(kana)?;
self.replace_mora_data(&accent_phrases, style_id)
}

/// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
Expand Down Expand Up @@ -743,75 +747,7 @@ pub(crate) mod blocking {
text: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}

let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;

let accent_phrases: Vec<AccentPhraseModel> = utterance
.breath_groups()
.iter()
.enumerate()
.fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
});

let accent_phrases = self.open_jtalk_analyzer.analyze(text)?;
self.replace_mora_data(&accent_phrases, style_id)
}

Expand Down
128 changes: 128 additions & 0 deletions crates/voicevox_core/src/text_analyzer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use crate::{
engine::{self, parse_kana, MoraModel, Utterance},
AccentPhraseModel, FullcontextExtractor, Result,
};

pub trait TextAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>>;
}

/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct KanaAnalyzer;

impl KanaAnalyzer {
pub fn new() -> Self {
Self {}
}
}
eyr1n marked this conversation as resolved.
Show resolved Hide resolved

impl TextAnalyzer for KanaAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
Ok(parse_kana(text)?)
}
}

/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct OpenJTalkAnalyzer<O>(O);

impl<O> OpenJTalkAnalyzer<O> {
pub fn new(open_jtalk: O) -> Self {
Self(open_jtalk)
}
}

impl<O: FullcontextExtractor> TextAnalyzer for OpenJTalkAnalyzer<O> {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
let utterance = Utterance::extract_full_context_label(&self.0, text)?;
Ok(utterance_to_accent_phrases(utterance))
}
}

fn utterance_to_accent_phrases(utterance: Utterance) -> Vec<AccentPhraseModel> {
let accent_phrases: Vec<AccentPhraseModel> = utterance.breath_groups().iter().enumerate().fold(
Vec::new(),
|mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
},
);

accent_phrases
}

fn mora_to_text(mora: impl AsRef<str>) -> String {
eyr1n marked this conversation as resolved.
Show resolved Hide resolved
let last_char = mora.as_ref().chars().last().unwrap();
let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
format!(
"{}{}",
&mora.as_ref()[0..mora.as_ref().len() - 1],
last_char.to_lowercase()
)
} else {
mora.as_ref().to_string()
};
// もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
engine::mora2text(&mora).to_string()
}
Loading