Skip to content

Commit

Permalink
mutabilityとasyncnessを仕上げる (#553)
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip authored Aug 17, 2023
1 parent 2ffd87e commit f4868ac
Show file tree
Hide file tree
Showing 16 changed files with 567 additions and 403 deletions.
20 changes: 20 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ async_zip.workspace = true
cfg-if = "1.0.0"
derive-getters.workspace = true
derive-new = "0.5.9"
derive_more = "0.99.17"
easy-ext.workspace = true
fs-err.workspace = true
futures = "0.3.26"
Expand Down
4 changes: 0 additions & 4 deletions crates/voicevox_core/src/engine/synthesis_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ impl SynthesisEngine {
&self.inference_core
}

pub fn inference_core_mut(&mut self) -> &mut InferenceCore {
&mut self.inference_core
}

pub async fn create_accent_phrases(
&self,
text: &str,
Expand Down
75 changes: 44 additions & 31 deletions crates/voicevox_core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,15 @@ pub enum Error {
#[error("{}", base_error_message(VOICEVOX_RESULT_GPU_SUPPORT_ERROR))]
GpuSupport,

#[error("{} ({}): {source}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR), path.display())]
LoadModel {
path: PathBuf,
#[source]
source: anyhow::Error,
},
#[error("{} ({})", base_error_message(VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR), path.display())]
AlreadyLoadedModel { path: PathBuf },
#[error(transparent)]
LoadModel(#[from] LoadModelError),

#[error(
"{} ({model_id:?})",
base_error_message(VOICEVOX_RESULT_UNLOADED_MODEL_ERROR)
)]
UnloadedModel { model_id: VoiceModelId },

#[error(
"{}({path}):{source}",
base_error_message(VOICEVOX_RESULT_OPEN_FILE_ERROR)
)]
OpenFile {
path: PathBuf,
#[source]
source: anyhow::Error,
},

#[error(
"{}({path}):{source}",
base_error_message(VOICEVOX_RESULT_VVM_MODEL_READ_ERROR)
)]
VvmRead {
path: PathBuf,
#[source]
source: anyhow::Error,
},

#[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))]
LoadMetas(#[source] anyhow::Error),

#[error(
"{},{0}",
base_error_message(VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR)
Expand Down Expand Up @@ -111,6 +82,48 @@ pub enum Error {
InvalidWord(InvalidWordError),
}

pub(crate) type LoadModelResult<T> = std::result::Result<T, LoadModelError>;

/// 音声モデル読み込みのエラー。
#[derive(Error, Debug)]
#[error(
"`{path}`の読み込みに失敗しました: {context}{}",
source.as_ref().map(|e| format!(": {e}")).unwrap_or_default())
]
pub struct LoadModelError {
pub(crate) path: PathBuf,
pub(crate) context: LoadModelErrorKind,
#[source]
pub(crate) source: Option<anyhow::Error>,
}

impl LoadModelError {
pub fn context(&self) -> &LoadModelErrorKind {
&self.context
}
}

#[derive(derive_more::Display, Debug)]
pub enum LoadModelErrorKind {
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR)")]
#[display(fmt = "ZIPファイルとして開くことができませんでした")]
OpenZipFile,
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR)")]
#[display(fmt = "`{filename}`を読み取れませんでした")]
ReadZipEntry { filename: String },
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR)")]
#[display(fmt = "モデル`{id}`は既に読み込まれています")]
ModelAlreadyLoaded { id: VoiceModelId },
//#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR)")]
#[display(fmt = "スタイル`{id}`は既に読み込まれています")]
StyleAlreadyLoaded { id: StyleId },
#[display(
fmt = "{}",
"base_error_message(VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR)"
)]
InvalidModelData,
}

fn base_error_message(result_code: VoicevoxResultCode) -> &'static str {
let c_message: &'static str = crate::result_code::error_result_to_message(result_code);
&c_message[..(c_message.len() - 1)]
Expand Down
97 changes: 38 additions & 59 deletions crates/voicevox_core/src/inference_core.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use self::status::*;
use super::*;
use onnxruntime::{
ndarray,
session::{AnyArray, NdArray},
};
use onnxruntime::{ndarray, session::NdArray};

const PHONEME_LENGTH_MINIMAL: f32 = 0.01;

Expand All @@ -18,7 +15,7 @@ impl InferenceCore {
load_all_models: bool,
) -> Result<Self> {
if !use_gpu || Self::can_support_gpu_feature()? {
let mut status = Status::new(use_gpu, cpu_num_threads);
let status = Status::new(use_gpu, cpu_num_threads);

if load_all_models {
for model in &VoiceModel::get_all_models().await? {
Expand All @@ -43,14 +40,14 @@ impl InferenceCore {
}
}

pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> {
pub async fn load_model(&self, model: &VoiceModel) -> Result<()> {
self.status.load_model(model).await
}

pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
self.status.unload_model(voice_model_id)
}
pub fn metas(&self) -> &VoiceModelMeta {
pub fn metas(&self) -> VoiceModelMeta {
self.status.metas()
}

Expand All @@ -71,21 +68,15 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;

let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut phoneme_vector_array, &mut speaker_id_array];
let phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

let mut output = self
.status
.predict_duration_session_run(model_id, input_tensors)?;
.predict_duration_session_run(&model_id, phoneme_vector_array, speaker_id_array)
.await?;

for output_item in output.iter_mut() {
if *output_item < PHONEME_LENGTH_MINIMAL {
Expand All @@ -112,37 +103,31 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;

let mut length_array = NdArray::new(ndarray::arr0(length as i64));
let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let mut consonant_phoneme_vector_array =
NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let mut start_accent_phrase_vector_array =
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

let length_array = NdArray::new(ndarray::arr0(length as i64));
let vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
let consonant_phoneme_vector_array = NdArray::new(ndarray::arr1(consonant_phoneme_vector));
let start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
let end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
let start_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(start_accent_phrase_vector));
let mut end_accent_phrase_vector_array =
NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> = vec![
&mut length_array,
&mut vowel_phoneme_vector_array,
&mut consonant_phoneme_vector_array,
&mut start_accent_vector_array,
&mut end_accent_vector_array,
&mut start_accent_phrase_vector_array,
&mut end_accent_phrase_vector_array,
&mut speaker_id_array,
];
let end_accent_phrase_vector_array = NdArray::new(ndarray::arr1(end_accent_phrase_vector));
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

self.status
.predict_intonation_session_run(model_id, input_tensors)
.predict_intonation_session_run(
&model_id,
length_array,
vowel_phoneme_vector_array,
consonant_phoneme_vector_array,
start_accent_vector_array,
end_accent_vector_array,
start_accent_phrase_vector_array,
end_accent_phrase_vector_array,
speaker_id_array,
)
.await
}

pub async fn decode(
Expand All @@ -157,11 +142,7 @@ impl InferenceCore {
return Err(Error::InvalidStyleId { style_id });
}

let (model_id, model_inner_id) = self
.status
.id_relations
.get(&style_id)
.ok_or(Error::InvalidStyleId { style_id })?;
let (model_id, model_inner_id) = self.status.ids_for(style_id)?;

// 音が途切れてしまうのを避けるworkaround処理が入っている
// TODO: 改善したらここのpadding処理を取り除く
Expand All @@ -179,23 +160,21 @@ impl InferenceCore {
padding_size,
);

let mut f0_array = NdArray::new(
let f0_array = NdArray::new(
ndarray::arr1(&f0_with_padding)
.into_shape([length_with_padding, 1])
.unwrap(),
);
let mut phoneme_array = NdArray::new(
let phoneme_array = NdArray::new(
ndarray::arr1(&phoneme_with_padding)
.into_shape([length_with_padding, phoneme_size])
.unwrap(),
);
let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));

let input_tensors: Vec<&mut dyn AnyArray> =
vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array];
let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));

self.status
.decode_session_run(model_id, input_tensors)
.decode_session_run(&model_id, f0_array, phoneme_array, speaker_id_array)
.await
.map(|output| Self::trim_padding_from_output(output, padding_size))
}

Expand Down
32 changes: 15 additions & 17 deletions crates/voicevox_core/src/result_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,10 @@ pub enum VoicevoxResultCode {
VOICEVOX_RESULT_OK = 0,
/// open_jtalk辞書ファイルが読み込まれていない
VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR = 1,
/// modelの読み込みに失敗した
VOICEVOX_RESULT_LOAD_MODEL_ERROR = 2,
/// サポートされているデバイス情報取得に失敗した
VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR = 3,
/// GPUモードがサポートされていない
VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4,
/// メタ情報読み込みに失敗した
VOICEVOX_RESULT_LOAD_METAS_ERROR = 5,
/// 無効なstyle_idが指定された
VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR = 6,
/// 無効なmodel_idが指定された
Expand All @@ -35,12 +31,16 @@ pub enum VoicevoxResultCode {
VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR = 14,
/// 無効なAccentPhrase
VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 15,
/// ファイルオープンエラー
VOICEVOX_RESULT_OPEN_FILE_ERROR = 16,
/// Modelを読み込めなかった
VOICEVOX_RESULT_VVM_MODEL_READ_ERROR = 17,
/// すでに読み込まれているModelを読み込もうとした
VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR = 18,
/// ZIPファイルを開くことに失敗した
VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR = 16,
/// ZIP内のファイルが読めなかった
VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17,
/// すでに読み込まれている音声モデルを読み込もうとした
VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR = 18,
/// すでに読み込まれているスタイルを読み込もうとした
VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR = 26,
/// 無効なモデルデータ
VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR = 27,
/// Modelが読み込まれていない
VOICEVOX_RESULT_UNLOADED_MODEL_ERROR = 19,
/// ユーザー辞書を読み込めなかった
Expand All @@ -64,8 +64,6 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => {
"OpenJTalkの辞書が読み込まれていません\0"
}
VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0",
VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0",

VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0",
VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => {
Expand All @@ -85,11 +83,11 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
}
VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR => "無効なaudio_queryです\0",
VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR => "無効なaccent_phraseです\0",
VOICEVOX_RESULT_OPEN_FILE_ERROR => "ファイルオープンに失敗しました\0",
VOICEVOX_RESULT_VVM_MODEL_READ_ERROR => "Modelを読み込めませんでした\0",
VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR => {
"すでに読み込まれているModelを読み込もうとしました\0"
}
VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR => "ZIPファイルのオープンに失敗しました\0",
VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR => "ZIP内のファイルを読むことができませんでした\0",
VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR => "同じIDのモデルを読むことはできません\0",
VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR => "同じIDのスタイルを読むことはできません\0",
VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR => "モデルデータを読むことができませんでした\0",
VOICEVOX_RESULT_UNLOADED_MODEL_ERROR => "Modelが読み込まれていません\0",
VOICEVOX_RESULT_LOAD_USER_DICT_ERROR => "ユーザー辞書を読み込めませんでした\0",
VOICEVOX_RESULT_SAVE_USER_DICT_ERROR => "ユーザー辞書を書き込めませんでした\0",
Expand Down
Loading

0 comments on commit f4868ac

Please sign in to comment.