diff --git a/Cargo.lock b/Cargo.lock
index 595e0e30f..a4460dae4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -730,6 +730,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
 
+[[package]]
+name = "convert_case"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
+
 [[package]]
 name = "cookie"
 version = "0.14.4"
@@ -956,6 +962,19 @@ dependencies = [
  "syn 1.0.102",
 ]
 
+[[package]]
+name = "derive_more"
+version = "0.99.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
+dependencies = [
+ "convert_case",
+ "proc-macro2",
+ "quote",
+ "rustc_version 0.4.0",
+ "syn 1.0.102",
+]
+
 [[package]]
 name = "diff"
 version = "0.1.13"
@@ -3942,6 +3961,7 @@ dependencies = [
  "cfg-if",
  "derive-getters",
  "derive-new",
+ "derive_more",
  "easy-ext",
  "flate2",
  "fs-err",
diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml
index 05949fa90..c542a9860 100644
--- a/crates/voicevox_core/Cargo.toml
+++ b/crates/voicevox_core/Cargo.toml
@@ -14,6 +14,7 @@ async_zip.workspace = true
 cfg-if = "1.0.0"
 derive-getters.workspace = true
 derive-new = "0.5.9"
+derive_more = "0.99.17"
 easy-ext.workspace = true
 fs-err.workspace = true
 futures = "0.3.26"
diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs
index 0adce4fe2..96d2353e4 100644
--- a/crates/voicevox_core/src/engine/synthesis_engine.rs
+++ b/crates/voicevox_core/src/engine/synthesis_engine.rs
@@ -30,10 +30,6 @@ impl SynthesisEngine {
         &self.inference_core
     }
 
-    pub fn inference_core_mut(&mut self) -> &mut InferenceCore {
-        &mut self.inference_core
-    }
-
     pub async fn create_accent_phrases(
         &self,
         text: &str,
diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs
index 7ad258a34..12883f2ab 100644
--- a/crates/voicevox_core/src/error.rs
+++ b/crates/voicevox_core/src/error.rs
@@ -21,14 +21,8 @@ pub enum Error {
     #[error("{}", base_error_message(VOICEVOX_RESULT_GPU_SUPPORT_ERROR))]
     GpuSupport,
 
-    #[error("{} ({}): {source}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR), path.display())]
-    LoadModel {
-        path: PathBuf,
-        #[source]
-        source: anyhow::Error,
-    },
-    #[error("{} ({})", base_error_message(VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR), path.display())]
-    AlreadyLoadedModel { path: PathBuf },
+    #[error(transparent)]
+    LoadModel(#[from] LoadModelError),
 
     #[error(
         "{} ({model_id:?})",
@@ -36,29 +30,6 @@ pub enum Error {
     )]
     UnloadedModel { model_id: VoiceModelId },
 
-    #[error(
-        "{}({path}):{source}",
-        base_error_message(VOICEVOX_RESULT_OPEN_FILE_ERROR)
-    )]
-    OpenFile {
-        path: PathBuf,
-        #[source]
-        source: anyhow::Error,
-    },
-
-    #[error(
-        "{}({path}):{source}",
-        base_error_message(VOICEVOX_RESULT_VVM_MODEL_READ_ERROR)
-    )]
-    VvmRead {
-        path: PathBuf,
-        #[source]
-        source: anyhow::Error,
-    },
-
-    #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))]
-    LoadMetas(#[source] anyhow::Error),
-
     #[error(
         "{},{0}",
         base_error_message(VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR)
@@ -111,6 +82,48 @@ pub enum Error {
     InvalidWord(InvalidWordError),
 }
 
+pub(crate) type LoadModelResult<T> = std::result::Result<T, LoadModelError>;
+
+/// 音声モデル読み込みのエラー。
+#[derive(Error, Debug)]
+#[error(
+    "`{path}`の読み込みに失敗しました: {context}{}",
+    source.as_ref().map(|e| format!(": {e}")).unwrap_or_default())
+]
+pub struct LoadModelError {
+    pub(crate) path: PathBuf,
+    pub(crate) context: LoadModelErrorKind,
+    #[source]
+    pub(crate) source: Option<anyhow::Error>,
+}
+
+impl LoadModelError {
+    pub fn context(&self) -> &LoadModelErrorKind {
+        &self.context
+    }
+}
+
+#[derive(derive_more::Display, Debug)]
+pub enum LoadModelErrorKind {
+    //#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR)")]
+    #[display(fmt = "ZIPファイルとして開くことができませんでした")]
+    OpenZipFile,
+    //#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR)")]
+    #[display(fmt = "`{filename}`を読み取れませんでした")]
+    ReadZipEntry { filename: String },
+    //#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR)")]
+    #[display(fmt = "モデル`{id}`は既に読み込まれています")]
+    ModelAlreadyLoaded { id: VoiceModelId },
+    //#[display(fmt = "{}", "base_error_message(VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR)")]
+    #[display(fmt = "スタイル`{id}`は既に読み込まれています")]
+    StyleAlreadyLoaded { id: StyleId },
+    #[display(
+        fmt = "{}",
+        "base_error_message(VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR)"
+    )]
+    InvalidModelData,
+}
+
 fn base_error_message(result_code: VoicevoxResultCode) -> &'static str {
     let c_message: &'static str = crate::result_code::error_result_to_message(result_code);
     &c_message[..(c_message.len() - 1)]
diff --git a/crates/voicevox_core/src/inference_core.rs b/crates/voicevox_core/src/inference_core.rs
index 66ec3bb18..d88943fc8 100644
--- a/crates/voicevox_core/src/inference_core.rs
+++ b/crates/voicevox_core/src/inference_core.rs
@@ -1,9 +1,6 @@
 use self::status::*;
 use super::*;
-use onnxruntime::{
-    ndarray,
-    session::{AnyArray, NdArray},
-};
+use onnxruntime::{ndarray, session::NdArray};
 
 const PHONEME_LENGTH_MINIMAL: f32 = 0.01;
 
@@ -18,7 +15,7 @@ impl InferenceCore {
         load_all_models: bool,
     ) -> Result<Self> {
         if !use_gpu || Self::can_support_gpu_feature()? {
-            let mut status = Status::new(use_gpu, cpu_num_threads);
+            let status = Status::new(use_gpu, cpu_num_threads);
 
             if load_all_models {
                 for model in &VoiceModel::get_all_models().await? {
@@ -43,14 +40,14 @@ impl InferenceCore {
         }
     }
 
-    pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> {
+    pub async fn load_model(&self, model: &VoiceModel) -> Result<()> {
         self.status.load_model(model).await
     }
 
-    pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
+    pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
         self.status.unload_model(voice_model_id)
     }
-    pub fn metas(&self) -> &VoiceModelMeta {
+    pub fn metas(&self) -> VoiceModelMeta {
         self.status.metas()
     }
 
@@ -71,21 +68,15 @@ impl InferenceCore {
             return Err(Error::InvalidStyleId { style_id });
         }
 
-        let (model_id, model_inner_id) = self
-            .status
-            .id_relations
-            .get(&style_id)
-            .ok_or(Error::InvalidStyleId { style_id })?;
-
-        let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
-        let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));
+        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
 
-        let input_tensors: Vec<&mut dyn AnyArray> =
-            vec![&mut phoneme_vector_array, &mut speaker_id_array];
+        let phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector));
+        let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));
 
         let mut output = self
             .status
-            .predict_duration_session_run(model_id, input_tensors)?;
+            .predict_duration_session_run(&model_id, phoneme_vector_array, speaker_id_array)
+            .await?;
 
         for output_item in output.iter_mut() {
             if *output_item < PHONEME_LENGTH_MINIMAL {
@@ -112,37 +103,31 @@ impl InferenceCore {
             return Err(Error::InvalidStyleId { style_id });
         }
 
-        let (model_id, model_inner_id) = self
-            .status
-            .id_relations
-            .get(&style_id)
-            .ok_or(Error::InvalidStyleId { style_id })?;
-
-        let mut length_array = NdArray::new(ndarray::arr0(length as i64));
-        let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
-        let mut consonant_phoneme_vector_array =
-            NdArray::new(ndarray::arr1(consonant_phoneme_vector));
-        let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
-        let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
-        let mut start_accent_phrase_vector_array =
+        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
+
+        let length_array = NdArray::new(ndarray::arr0(length as i64));
+        let vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector));
+        let consonant_phoneme_vector_array = NdArray::new(ndarray::arr1(consonant_phoneme_vector));
+        let start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector));
+        let end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector));
+        let start_accent_phrase_vector_array =
             NdArray::new(ndarray::arr1(start_accent_phrase_vector));
-        let mut end_accent_phrase_vector_array =
-            NdArray::new(ndarray::arr1(end_accent_phrase_vector));
-        let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));
-
-        let input_tensors: Vec<&mut dyn AnyArray> = vec![
-            &mut length_array,
-            &mut vowel_phoneme_vector_array,
-            &mut consonant_phoneme_vector_array,
-            &mut start_accent_vector_array,
-            &mut end_accent_vector_array,
-            &mut start_accent_phrase_vector_array,
-            &mut end_accent_phrase_vector_array,
-            &mut speaker_id_array,
-        ];
+        let end_accent_phrase_vector_array = NdArray::new(ndarray::arr1(end_accent_phrase_vector));
+        let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));
 
         self.status
-            .predict_intonation_session_run(model_id, input_tensors)
+            .predict_intonation_session_run(
+                &model_id,
+                length_array,
+                vowel_phoneme_vector_array,
+                consonant_phoneme_vector_array,
+                start_accent_vector_array,
+                end_accent_vector_array,
+                start_accent_phrase_vector_array,
+                end_accent_phrase_vector_array,
+                speaker_id_array,
+            )
+            .await
     }
 
     pub async fn decode(
@@ -157,11 +142,7 @@ impl InferenceCore {
             return Err(Error::InvalidStyleId { style_id });
         }
 
-        let (model_id, model_inner_id) = self
-            .status
-            .id_relations
-            .get(&style_id)
-            .ok_or(Error::InvalidStyleId { style_id })?;
+        let (model_id, model_inner_id) = self.status.ids_for(style_id)?;
 
         // 音が途切れてしまうのを避けるworkaround処理が入っている
         // TODO: 改善したらここのpadding処理を取り除く
@@ -179,23 +160,21 @@ impl InferenceCore {
             padding_size,
         );
 
-        let mut f0_array = NdArray::new(
+        let f0_array = NdArray::new(
             ndarray::arr1(&f0_with_padding)
                 .into_shape([length_with_padding, 1])
                 .unwrap(),
         );
-        let mut phoneme_array = NdArray::new(
+        let phoneme_array = NdArray::new(
             ndarray::arr1(&phoneme_with_padding)
                 .into_shape([length_with_padding, phoneme_size])
                 .unwrap(),
         );
-        let mut speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id() as i64]));
-
-        let input_tensors: Vec<&mut dyn AnyArray> =
-            vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array];
+        let speaker_id_array = NdArray::new(ndarray::arr1(&[model_inner_id.raw_id().into()]));
 
         self.status
-            .decode_session_run(model_id, input_tensors)
+            .decode_session_run(&model_id, f0_array, phoneme_array, speaker_id_array)
+            .await
             .map(|output| Self::trim_padding_from_output(output, padding_size))
     }
 
diff --git a/crates/voicevox_core/src/result_code.rs b/crates/voicevox_core/src/result_code.rs
index 07cf05e18..541c65a79 100644
--- a/crates/voicevox_core/src/result_code.rs
+++ b/crates/voicevox_core/src/result_code.rs
@@ -11,14 +11,10 @@ pub enum VoicevoxResultCode {
     VOICEVOX_RESULT_OK = 0,
     /// open_jtalk辞書ファイルが読み込まれていない
     VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR = 1,
-    /// modelの読み込みに失敗した
-    VOICEVOX_RESULT_LOAD_MODEL_ERROR = 2,
     /// サポートされているデバイス情報取得に失敗した
     VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR = 3,
     /// GPUモードがサポートされていない
     VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4,
-    /// メタ情報読み込みに失敗した
-    VOICEVOX_RESULT_LOAD_METAS_ERROR = 5,
     /// 無効なstyle_idが指定された
     VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR = 6,
     /// 無効なmodel_idが指定された
@@ -35,12 +31,16 @@ pub enum VoicevoxResultCode {
     VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR = 14,
     /// 無効なAccentPhrase
     VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 15,
-    /// ファイルオープンエラー
-    VOICEVOX_RESULT_OPEN_FILE_ERROR = 16,
-    /// Modelを読み込めなかった
-    VOICEVOX_RESULT_VVM_MODEL_READ_ERROR = 17,
-    /// すでに読み込まれているModelを読み込もうとした
-    VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR = 18,
+    /// ZIPファイルを開くことに失敗した
+    VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR = 16,
+    /// ZIP内のファイルが読めなかった
+    VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17,
+    /// すでに読み込まれている音声モデルを読み込もうとした
+    VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR = 18,
+    /// すでに読み込まれているスタイルを読み込もうとした
+    VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR = 26,
+    /// 無効なモデルデータ
+    VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR = 27,
     /// Modelが読み込まれていない
     VOICEVOX_RESULT_UNLOADED_MODEL_ERROR = 19,
     /// ユーザー辞書を読み込めなかった
@@ -64,8 +64,6 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
         VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => {
             "OpenJTalkの辞書が読み込まれていません\0"
         }
-        VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0",
-        VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0",
 
         VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0",
         VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => {
@@ -85,11 +83,11 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
         }
         VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR => "無効なaudio_queryです\0",
         VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR => "無効なaccent_phraseです\0",
-        VOICEVOX_RESULT_OPEN_FILE_ERROR => "ファイルオープンに失敗しました\0",
-        VOICEVOX_RESULT_VVM_MODEL_READ_ERROR => "Modelを読み込めませんでした\0",
-        VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR => {
-            "すでに読み込まれているModelを読み込もうとしました\0"
-        }
+        VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR => "ZIPファイルのオープンに失敗しました\0",
+        VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR => "ZIP内のファイルを読むことができませんでした\0",
+        VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR => "同じIDのモデルを読むことはできません\0",
+        VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR => "同じIDのスタイルを読むことはできません\0",
+        VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR => "モデルデータを読むことができませんでした\0",
         VOICEVOX_RESULT_UNLOADED_MODEL_ERROR => "Modelが読み込まれていません\0",
         VOICEVOX_RESULT_LOAD_USER_DICT_ERROR => "ユーザー辞書を読み込めませんでした\0",
         VOICEVOX_RESULT_SAVE_USER_DICT_ERROR => "ユーザー辞書を書き込めませんでした\0",
diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs
index 12f7e1bd6..a92ffae8e 100644
--- a/crates/voicevox_core/src/status.rs
+++ b/crates/voicevox_core/src/status.rs
@@ -1,11 +1,13 @@
 use super::*;
+use itertools::iproduct;
 use once_cell::sync::Lazy;
 use onnxruntime::{
     environment::Environment,
-    session::{AnyArray, Session},
+    ndarray::{Ix0, Ix1, Ix2},
+    session::{NdArray, Session},
     GraphOptimizationLevel, LoggingLevel,
 };
-use std::sync::Mutex;
+use std::sync::Arc;
 use std::{env, path::Path};
 use tracing::error;
 
@@ -19,18 +21,9 @@ cfg_if! {
 use std::collections::BTreeMap;
 
 pub struct Status {
-    models: StatusModels,
-    merged_metas: VoiceModelMeta,
+    loaded_models: std::sync::Mutex<LoadedModels>,
     light_session_options: SessionOptions, // 軽いモデルはこちらを使う
     heavy_session_options: SessionOptions, // 重いモデルはこちらを使う
-    pub id_relations: BTreeMap<StyleId, (VoiceModelId, ModelInnerId)>, // FIXME: pubはやめたい
-}
-
-struct StatusModels {
-    metas: BTreeMap<VoiceModelId, VoiceModelMeta>,
-    predict_duration: BTreeMap<VoiceModelId, Mutex<Session<'static>>>,
-    predict_intonation: BTreeMap<VoiceModelId, Mutex<Session<'static>>>,
-    decode: BTreeMap<VoiceModelId, Mutex<Session<'static>>>,
 }
 
 #[derive(new, Getters)]
@@ -58,38 +51,21 @@ static ENVIRONMENT: Lazy<Environment> = Lazy::new(|| {
         .unwrap()
 });
 
-#[allow(unsafe_code)]
-unsafe impl Send for Status {}
-
-#[allow(unsafe_code)]
-unsafe impl Sync for Status {}
-
 impl Status {
     pub fn new(use_gpu: bool, cpu_num_threads: u16) -> Self {
         Self {
-            models: StatusModels {
-                metas: BTreeMap::new(),
-                predict_duration: BTreeMap::new(),
-                predict_intonation: BTreeMap::new(),
-                decode: BTreeMap::new(),
-            },
-            merged_metas: VoiceModelMeta::default(),
+            loaded_models: Default::default(),
             light_session_options: SessionOptions::new(cpu_num_threads, false),
             heavy_session_options: SessionOptions::new(cpu_num_threads, use_gpu),
-            id_relations: BTreeMap::default(),
         }
     }
 
-    pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> {
-        for speaker in model.metas().iter() {
-            for style in speaker.styles().iter() {
-                if self.id_relations.contains_key(style.id()) {
-                    Err(Error::AlreadyLoadedModel {
-                        path: model.path().clone(),
-                    })?;
-                }
-            }
-        }
+    pub async fn load_model(&self, model: &VoiceModel) -> Result<()> {
+        self.loaded_models
+            .lock()
+            .unwrap()
+            .ensure_acceptable(model)?;
+
         let models = model.read_inference_models().await?;
 
         let predict_duration_session = self.new_session(
@@ -107,79 +83,37 @@ impl Status {
             &self.heavy_session_options,
             model.path(),
         )?;
-        self.models
-            .metas
-            .insert(model.id().clone(), model.metas().clone());
-
-        for speaker in model.metas().iter() {
-            for style in speaker.styles().iter() {
-                self.id_relations.insert(
-                    *style.id(),
-                    (model.id().clone(), model.model_inner_id_for(*style.id())),
-                );
-            }
-        }
-        self.set_metas();
-
-        self.models
-            .predict_duration
-            .insert(model.id().clone(), Mutex::new(predict_duration_session));
-        self.models
-            .predict_intonation
-            .insert(model.id().clone(), Mutex::new(predict_intonation_session));
-
-        self.models
-            .decode
-            .insert(model.id().clone(), Mutex::new(decode_model));
 
+        self.loaded_models.lock().unwrap().insert(
+            model,
+            predict_duration_session,
+            predict_intonation_session,
+            decode_model,
+        )?;
         Ok(())
     }
 
-    pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
-        if self.is_loaded_model(voice_model_id) {
-            self.models.predict_intonation.remove(voice_model_id);
-            self.models.predict_duration.remove(voice_model_id);
-            self.models.decode.remove(voice_model_id);
-
-            let remove_style_ids = self
-                .id_relations
-                .iter()
-                .filter(|&(_, (loaded_model_id, _))| loaded_model_id == voice_model_id)
-                .map(|(&style_id, _)| style_id)
-                .collect::<Vec<_>>();
-
-            for style_id in remove_style_ids.iter() {
-                self.id_relations.remove(style_id);
-            }
-            self.set_metas();
-            Ok(())
-        } else {
-            Err(Error::UnloadedModel {
-                model_id: voice_model_id.clone(),
-            })
-        }
+    pub fn unload_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
+        self.loaded_models.lock().unwrap().remove(voice_model_id)
     }
 
-    fn set_metas(&mut self) {
-        let mut meta = VoiceModelMeta::default();
-        for m in self.models.metas.values() {
-            meta.extend_from_slice(m);
-        }
-        self.merged_metas = meta;
+    pub fn metas(&self) -> VoiceModelMeta {
+        self.loaded_models.lock().unwrap().metas()
     }
 
-    pub fn metas(&self) -> &VoiceModelMeta {
-        &self.merged_metas
+    pub(crate) fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> {
+        self.loaded_models.lock().unwrap().ids_for(style_id)
     }
 
     pub fn is_loaded_model(&self, voice_model_id: &VoiceModelId) -> bool {
-        self.models.predict_duration.contains_key(voice_model_id)
-            && self.models.predict_intonation.contains_key(voice_model_id)
-            && self.models.decode.contains_key(voice_model_id)
+        self.loaded_models
+            .lock()
+            .unwrap()
+            .contains_voice_model(voice_model_id)
     }
 
     pub fn is_loaded_model_by_style_id(&self, style_id: StyleId) -> bool {
-        self.id_relations.contains_key(&style_id)
+        self.loaded_models.lock().unwrap().contains_style(style_id)
     }
 
     fn new_session(
@@ -187,11 +121,12 @@ impl Status {
         model: &[u8],
         session_options: &SessionOptions,
         path: impl AsRef<Path>,
-    ) -> Result<Session<'static>> {
+    ) -> LoadModelResult<Session<'static>> {
         self.new_session_from_bytes(|| model_file::decrypt(model), session_options)
-            .map_err(|source| Error::LoadModel {
-                path: path.as_ref().into(),
-                source,
+            .map_err(|source| LoadModelError {
+                path: path.as_ref().to_owned(),
+                context: LoadModelErrorKind::InvalidModelData,
+                source: Some(source),
             })
     }
 
@@ -226,62 +161,291 @@ impl Status {
     }
 
     pub fn validate_speaker_id(&self, style_id: StyleId) -> bool {
-        self.id_relations.contains_key(&style_id)
+        self.is_loaded_model_by_style_id(style_id)
     }
 
-    pub fn predict_duration_session_run(
+    /// # Panics
+    ///
+    /// `self`が`model_id`を含んでいないとき、パニックする。
+    pub async fn predict_duration_session_run(
         &self,
         model_id: &VoiceModelId,
-        inputs: Vec<&mut dyn AnyArray>,
+        mut phoneme_vector_array: NdArray<i64, Ix1>,
+        mut speaker_id_array: NdArray<i64, Ix1>,
     ) -> Result<Vec<f32>> {
-        if let Some(model) = self.models.predict_duration.get(model_id) {
-            if let Ok(output_tensors) = model.lock().unwrap().run(inputs) {
-                Ok(output_tensors[0].as_slice().unwrap().to_owned())
-            } else {
-                Err(Error::InferenceFailed)
-            }
-        } else {
-            Err(Error::InvalidModelId {
-                model_id: model_id.clone(),
-            })
-        }
+        let predict_duration = self.loaded_models.lock().unwrap().get(
+            model_id,
+            |SessionSet {
+                 predict_duration, ..
+             }| predict_duration,
+        );
+
+        tokio::task::spawn_blocking(move || {
+            let mut predict_duration = predict_duration.lock().unwrap();
+
+            let output_tensors = predict_duration
+                .run(vec![&mut phoneme_vector_array, &mut speaker_id_array])
+                .map_err(|_| Error::InferenceFailed)?;
+            Ok(output_tensors[0].as_slice().unwrap().to_owned())
+        })
+        .await
+        .unwrap()
     }
 
-    pub fn predict_intonation_session_run(
+    /// # Panics
+    ///
+    /// `self`が`model_id`を含んでいないとき、パニックする。
+    #[allow(clippy::too_many_arguments)]
+    pub async fn predict_intonation_session_run(
         &self,
         model_id: &VoiceModelId,
-        inputs: Vec<&mut dyn AnyArray>,
+        mut length_array: NdArray<i64, Ix0>,
+        mut vowel_phoneme_vector_array: NdArray<i64, Ix1>,
+        mut consonant_phoneme_vector_array: NdArray<i64, Ix1>,
+        mut start_accent_vector_array: NdArray<i64, Ix1>,
+        mut end_accent_vector_array: NdArray<i64, Ix1>,
+        mut start_accent_phrase_vector_array: NdArray<i64, Ix1>,
+        mut end_accent_phrase_vector_array: NdArray<i64, Ix1>,
+        mut speaker_id_array: NdArray<i64, Ix1>,
     ) -> Result<Vec<f32>> {
-        if let Some(model) = self.models.predict_intonation.get(model_id) {
-            if let Ok(output_tensors) = model.lock().unwrap().run(inputs) {
-                Ok(output_tensors[0].as_slice().unwrap().to_owned())
-            } else {
-                Err(Error::InferenceFailed)
-            }
-        } else {
-            Err(Error::InvalidModelId {
-                model_id: model_id.clone(),
-            })
-        }
+        let predict_intonation = self.loaded_models.lock().unwrap().get(
+            model_id,
+            |SessionSet {
+                 predict_intonation, ..
+             }| predict_intonation,
+        );
+
+        tokio::task::spawn_blocking(move || {
+            let mut predict_intonation = predict_intonation.lock().unwrap();
+
+            let output_tensors = predict_intonation
+                .run(vec![
+                    &mut length_array,
+                    &mut vowel_phoneme_vector_array,
+                    &mut consonant_phoneme_vector_array,
+                    &mut start_accent_vector_array,
+                    &mut end_accent_vector_array,
+                    &mut start_accent_phrase_vector_array,
+                    &mut end_accent_phrase_vector_array,
+                    &mut speaker_id_array,
+                ])
+                .map_err(|_| Error::InferenceFailed)?;
+            Ok(output_tensors[0].as_slice().unwrap().to_owned())
+        })
+        .await
+        .unwrap()
     }
 
-    pub fn decode_session_run(
+    /// # Panics
+    ///
+    /// `self`が`model_id`を含んでいないとき、パニックする。
+    pub async fn decode_session_run(
         &self,
         model_id: &VoiceModelId,
-        inputs: Vec<&mut dyn AnyArray>,
+        mut f0_array: NdArray<f32, Ix2>,
+        mut phoneme_array: NdArray<f32, Ix2>,
+        mut speaker_id_array: NdArray<i64, Ix1>,
     ) -> Result<Vec<f32>> {
-        if let Some(model) = self.models.decode.get(model_id) {
-            if let Ok(output_tensors) = model.lock().unwrap().run(inputs) {
-                Ok(output_tensors[0].as_slice().unwrap().to_owned())
-            } else {
-                Err(Error::InferenceFailed)
-            }
-        } else {
-            Err(Error::InvalidModelId {
-                model_id: model_id.clone(),
+        let decode = self
+            .loaded_models
+            .lock()
+            .unwrap()
+            .get(model_id, |SessionSet { decode, .. }| decode);
+
+        tokio::task::spawn_blocking(move || {
+            let mut decode = decode.lock().unwrap();
+
+            let output_tensors = decode
+                .run(vec![
+                    &mut f0_array,
+                    &mut phoneme_array,
+                    &mut speaker_id_array,
+                ])
+                .map_err(|_| Error::InferenceFailed)?;
+            Ok(output_tensors[0].as_slice().unwrap().to_owned())
+        })
+        .await
+        .unwrap()
+    }
+}
+
+/// 読み込んだモデルの`Session`とそのメタ情報を保有し、追加/削除/取得の操作を提供する。
+///
+/// この構造体のメソッドは、すべて一瞬で完了すべきである。
+#[derive(Default)]
+struct LoadedModels(BTreeMap<VoiceModelId, LoadedModel>);
+
+struct LoadedModel {
+    model_inner_ids: BTreeMap<StyleId, ModelInnerId>,
+    metas: VoiceModelMeta,
+    session_set: SessionSet,
+}
+
+impl LoadedModels {
+    fn metas(&self) -> VoiceModelMeta {
+        self.0
+            .values()
+            .flat_map(|LoadedModel { metas, .. }| metas)
+            .cloned()
+            .collect()
+    }
+
+    fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> {
+        let (
+            model_id,
+            LoadedModel {
+                model_inner_ids, ..
+            },
+        ) = self
+            .0
+            .iter()
+            .find(|(_, LoadedModel { metas, .. })| {
+                metas
+                    .iter()
+                    .flat_map(SpeakerMeta::styles)
+                    .any(|style| *style.id() == style_id)
             })
+            .ok_or(Error::InvalidStyleId { style_id })?;
+
+        let model_inner_id = *model_inner_ids
+            .get(&style_id)
+            .expect("`model_inner_ids` should contains all of the style IDs in the model");
+
+        Ok((model_id.clone(), model_inner_id))
+    }
+
+    /// # Panics
+    ///
+    /// `self`が`model_id`を含んでいないとき、パニックする。
+    fn get(
+        &self,
+        model_id: &VoiceModelId,
+        which: fn(&SessionSet) -> &Arc<std::sync::Mutex<AssertSend<Session<'static>>>>,
+    ) -> Arc<std::sync::Mutex<AssertSend<Session<'static>>>> {
+        which(&self.0[model_id].session_set).clone()
+    }
+
+    fn contains_voice_model(&self, model_id: &VoiceModelId) -> bool {
+        self.0.contains_key(model_id)
+    }
+
+    fn contains_style(&self, style_id: StyleId) -> bool {
+        self.styles().any(|style| *style.id() == style_id)
+    }
+
+    /// 与えられた`VoiceModel`を受け入れ可能かをチェックする。
+    ///
+    /// # Errors
+    ///
+    /// 音声モデルIDかスタイルIDが`model`と重複するとき、エラーを返す。
+    fn ensure_acceptable(&self, model: &VoiceModel) -> LoadModelResult<()> {
+        let loaded = self.styles();
+        let external = model.metas().iter().flat_map(|speaker| speaker.styles());
+
+        let error = |context| LoadModelError {
+            path: model.path().clone(),
+            context,
+            source: None,
+        };
+
+        if self.0.contains_key(model.id()) {
+            return Err(error(LoadModelErrorKind::ModelAlreadyLoaded {
+                id: model.id().clone(),
+            }));
+        }
+        if let Some((style, _)) =
+            iproduct!(loaded, external).find(|(loaded, external)| loaded.id() == external.id())
+        {
+            return Err(error(LoadModelErrorKind::StyleAlreadyLoaded {
+                id: *style.id(),
+            }));
+        }
+        Ok(())
+    }
+
+    fn insert(
+        &mut self,
+        model: &VoiceModel,
+        predict_duration: Session<'static>,
+        predict_intonation: Session<'static>,
+        decode: Session<'static>,
+    ) -> Result<()> {
+        self.ensure_acceptable(model)?;
+
+        let prev = self.0.insert(
+            model.id().clone(),
+            LoadedModel {
+                model_inner_ids: model.model_inner_ids(),
+                metas: model.metas().clone(),
+                session_set: SessionSet {
+                    predict_duration: Arc::new(std::sync::Mutex::new(predict_duration.into())),
+                    predict_intonation: Arc::new(std::sync::Mutex::new(predict_intonation.into())),
+                    decode: Arc::new(std::sync::Mutex::new(decode.into())),
+                },
+            },
+        );
+        assert!(prev.is_none());
+        Ok(())
+    }
+
+    fn remove(&mut self, model_id: &VoiceModelId) -> Result<()> {
+        if self.0.remove(model_id).is_none() {
+            return Err(Error::UnloadedModel {
+                model_id: model_id.clone(),
+            });
+        }
+        Ok(())
+    }
+
+    fn styles(&self) -> impl Iterator<Item = &StyleMeta> {
+        self.0
+            .values()
+            .flat_map(|LoadedModel { metas, .. }| metas)
+            .flat_map(|speaker| speaker.styles())
+    }
+}
+
+struct SessionSet {
+    predict_duration: Arc<std::sync::Mutex<AssertSend<Session<'static>>>>,
+    predict_intonation: Arc<std::sync::Mutex<AssertSend<Session<'static>>>>,
+    decode: Arc<std::sync::Mutex<AssertSend<Session<'static>>>>,
+}
+
+// FIXME: 以下のことをちゃんと確認した後、onnxruntime-rs側で`Session`が`Send`であると宣言する。
+// https://github.com/VOICEVOX/voicevox_core/issues/307#issuecomment-1276184614
+
+use self::assert_send::AssertSend;
+
+mod assert_send {
+    use std::ops::{Deref, DerefMut};
+
+    use onnxruntime::session::Session;
+
+    pub(super) struct AssertSend<T>(T);
+
+    impl From<Session<'static>> for AssertSend<Session<'static>> {
+        fn from(session: Session<'static>) -> Self {
+            Self(session)
+        }
+    }
+
+    impl<T> Deref for AssertSend<T> {
+        type Target = T;
+
+        fn deref(&self) -> &Self::Target {
+            &self.0
+        }
+    }
+
+    impl<T> DerefMut for AssertSend<T> {
+        fn deref_mut(&mut self) -> &mut Self::Target {
+            &mut self.0
         }
     }
+
+    // SAFETY: `Session` is probably "send"able.
+    #[allow(unsafe_code)]
+    unsafe impl<T> Send for AssertSend<T> {}
 }
 
 #[cfg(test)]
@@ -311,27 +475,22 @@ mod tests {
             cpu_num_threads,
             status.heavy_session_options.cpu_num_threads
         );
-        assert!(status.models.predict_duration.is_empty());
-        assert!(status.models.predict_intonation.is_empty());
-        assert!(status.models.decode.is_empty());
-        assert!(status.id_relations.is_empty());
+        assert!(status.loaded_models.lock().unwrap().0.is_empty());
     }
 
     #[rstest]
     #[tokio::test]
     async fn status_load_model_works() {
-        let mut status = Status::new(false, 0);
+        let status = Status::new(false, 0);
         let result = status.load_model(&open_default_vvm_file().await).await;
         assert_debug_fmt_eq!(Ok(()), result);
-        assert_eq!(1, status.models.predict_duration.len());
-        assert_eq!(1, status.models.predict_intonation.len());
-        assert_eq!(1, status.models.decode.len());
+        assert_eq!(1, status.loaded_models.lock().unwrap().0.len());
     }
 
     #[rstest]
     #[tokio::test]
     async fn status_is_model_loaded_works() {
-        let mut status = Status::new(false, 0);
+        let status = Status::new(false, 0);
         let vvm = open_default_vvm_file().await;
         assert!(
             !status.is_loaded_model(vvm.id()),
diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs
index dea396ffb..2eb114b6a 100644
--- a/crates/voicevox_core/src/voice_model.rs
+++ b/crates/voicevox_core/src/voice_model.rs
@@ -1,12 +1,11 @@
-use anyhow::anyhow;
 use async_zip::{read::fs::ZipFileReader, ZipEntry};
 use futures::future::{join3, join_all};
 use serde::{de::DeserializeOwned, Deserialize};
 
 use super::*;
 use std::{
-    collections::HashMap,
-    env,
+    collections::{BTreeMap, HashMap},
+    env, io,
     path::{Path, PathBuf},
 };
 
@@ -16,7 +15,9 @@ use std::{
 pub type RawVoiceModelId = String;
 
 /// 音声モデルID。
-#[derive(PartialEq, Eq, Clone, Ord, PartialOrd, Deserialize, new, Getters, Debug)]
+#[derive(
+    PartialEq, Eq, Clone, Ord, PartialOrd, Deserialize, new, Getters, derive_more::Display, Debug,
+)]
 pub struct VoiceModelId {
     raw_voice_model_id: RawVoiceModelId,
 }
@@ -42,7 +43,7 @@ pub(crate) struct InferenceModels {
 }
 
 impl VoiceModel {
-    pub(crate) async fn read_inference_models(&self) -> Result<InferenceModels> {
+    pub(crate) async fn read_inference_models(&self) -> LoadModelResult<InferenceModels> {
         let reader = VvmEntryReader::open(&self.path).await?;
         let (decode_model_result, predict_duration_model_result, predict_intonation_model_result) =
             join3(
@@ -53,39 +54,18 @@ impl VoiceModel {
             .await;
 
         Ok(InferenceModels {
-            predict_duration_model: predict_duration_model_result.map_err(|e| Error::VvmRead {
-                path: self.path.clone(),
-                source: e,
-            })?,
-            predict_intonation_model: predict_intonation_model_result.map_err(|e| {
-                Error::VvmRead {
-                    path: self.path.clone(),
-                    source: e,
-                }
-            })?,
-            decode_model: decode_model_result.map_err(|e| Error::VvmRead {
-                path: self.path.clone(),
-                source: e,
-            })?,
+            predict_duration_model: predict_duration_model_result?,
+            predict_intonation_model: predict_intonation_model_result?,
+            decode_model: decode_model_result?,
         })
     }
     /// VVMファイルから`VoiceModel`をコンストラクトする。
-    pub async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
-        let reader = VvmEntryReader::open(&path).await?;
-        let manifest = reader
-            .read_vvm_json::<Manifest>("manifest.json")
-            .await
-            .map_err(|e| Error::VvmRead {
-                path: path.as_ref().into(),
-                source: e,
-            })?;
+    pub async fn from_path(path: impl AsRef<Path>) -> LoadModelResult<Self> {
+        let reader = VvmEntryReader::open(path.as_ref()).await?;
+        let manifest = reader.read_vvm_json::<Manifest>("manifest.json").await?;
         let metas = reader
             .read_vvm_json::<VoiceModelMeta>(manifest.metas_filename())
-            .await
-            .map_err(|e| Error::VvmRead {
-                path: path.as_ref().into(),
-                source: e,
-            })?;
+            .await?;
         let id = VoiceModelId::new(nanoid!());
 
         Ok(Self {
@@ -96,6 +76,10 @@ impl VoiceModel {
         })
     }
 
+    // FIXME: `load_all_models`自体を廃止し、これはENGINE専用とする
+    /// # Panics
+    ///
+    /// 目的のディレクトリが読めなかったらパニックする
     pub async fn get_all_models() -> Result<Vec<Self>> {
         let root_dir = if cfg!(test) {
             Path::new(env!("CARGO_WORKSPACE_DIR")).join("model")
@@ -113,26 +97,37 @@ impl VoiceModel {
         let vvm_paths = root_dir
             .read_dir()
             .and_then(|entries| entries.collect::<std::result::Result<Vec<_>, _>>())
-            .map_err(|e| Error::LoadModel {
-                path: root_dir.clone(),
-                source: e.into(),
-            })?
+            .unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display()))
             .into_iter()
             .filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm"))
             .map(|entry| Self::from_path(entry.path()));
 
-        join_all(vvm_paths).await.into_iter().collect()
+        join_all(vvm_paths)
+            .await
+            .into_iter()
+            .collect::<std::result::Result<_, _>>()
+            .map_err(Into::into)
     }
     const ROOT_DIR_ENV_NAME: &str = "VV_MODELS_ROOT_DIR";
 
-    /// スタイルIDからモデル内IDを取得する。
+    /// モデル内のすべてのスタイルに対するモデル内IDを取得する。
+    ///
     /// モデル内IDのマッピングが存在しない場合はそのままスタイルIDを返す。
-    pub(crate) fn model_inner_id_for(&self, style_id: StyleId) -> ModelInnerId {
-        self.manifest
-            .style_id_to_model_inner_id()
-            .get(&style_id)
-            .cloned()
-            .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id()))
+    pub(crate) fn model_inner_ids(&self) -> BTreeMap<StyleId, ModelInnerId> {
+        self.metas
+            .iter()
+            .flat_map(SpeakerMeta::styles)
+            .map(StyleMeta::id)
+            .map(|&style_id| {
+                let model_inner_id = self
+                    .manifest
+                    .style_id_to_model_inner_id()
+                    .get(&style_id)
+                    .copied()
+                    .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id()));
+                (style_id, model_inner_id)
+            })
+            .collect()
     }
 }
 
@@ -148,12 +143,13 @@ struct VvmEntryReader {
 }
 
 impl VvmEntryReader {
-    async fn open(path: impl AsRef<Path>) -> Result<Self> {
-        let reader = ZipFileReader::new(path.as_ref())
+    async fn open(path: &Path) -> LoadModelResult<Self> {
+        let reader = ZipFileReader::new(path)
             .await
-            .map_err(|e| Error::OpenFile {
-                path: path.as_ref().into(),
-                source: e.into(),
+            .map_err(|source| LoadModelError {
+                path: path.to_owned(),
+                context: LoadModelErrorKind::OpenZipFile,
+                source: Some(source.into()),
             })?;
         let entry_map: HashMap<_, _> = reader
             .file()
@@ -173,22 +169,38 @@ impl VvmEntryReader {
             .collect();
         Ok(VvmEntryReader::new(reader, entry_map))
     }
-    async fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> anyhow::Result<T> {
+    async fn read_vvm_json<T: DeserializeOwned>(&self, filename: &str) -> LoadModelResult<T> {
         let bytes = self.read_vvm_entry(filename).await?;
-        serde_json::from_slice(&bytes).map_err(|e| e.into())
+        serde_json::from_slice(&bytes).map_err(|source| LoadModelError {
+            path: self.reader.path().to_owned(),
+            context: LoadModelErrorKind::ReadZipEntry {
+                filename: filename.to_owned(),
+            },
+            source: Some(source.into()),
+        })
     }
 
-    async fn read_vvm_entry(&self, filename: &str) -> anyhow::Result<Vec<u8>> {
-        let me = self
-            .entry_map
-            .get(filename)
-            .ok_or_else(|| anyhow!("Not found in vvm entries: {}", filename))?;
-        let mut manifest_reader = self.reader.entry(me.index).await?;
-        let mut buf = Vec::with_capacity(me.entry.uncompressed_size() as usize);
-        manifest_reader
-            .read_to_end_checked(&mut buf, &me.entry)
-            .await?;
-        Ok(buf)
+    async fn read_vvm_entry(&self, filename: &str) -> LoadModelResult<Vec<u8>> {
+        (|| async {
+            let me = self
+                .entry_map
+                .get(filename)
+                .ok_or_else(|| io::Error::from(io::ErrorKind::NotFound))?;
+            let mut manifest_reader = self.reader.entry(me.index).await?;
+            let mut buf = Vec::with_capacity(me.entry.uncompressed_size() as usize);
+            manifest_reader
+                .read_to_end_checked(&mut buf, &me.entry)
+                .await?;
+            Ok::<_, anyhow::Error>(buf)
+        })()
+        .await
+        .map_err(|source| LoadModelError {
+            path: self.reader.path().to_owned(),
+            context: LoadModelErrorKind::ReadZipEntry {
+                filename: filename.to_owned(),
+            },
+            source: Some(source),
+        })
     }
 }
 
diff --git a/crates/voicevox_core/src/voice_synthesizer.rs b/crates/voicevox_core/src/voice_synthesizer.rs
index 41c0406da..a798d8e17 100644
--- a/crates/voicevox_core/src/voice_synthesizer.rs
+++ b/crates/voicevox_core/src/voice_synthesizer.rs
@@ -173,18 +173,18 @@ impl Synthesizer {
     }
 
     /// 音声モデルを読み込む。
-    pub async fn load_voice_model(&mut self, model: &VoiceModel) -> Result<()> {
+    pub async fn load_voice_model(&self, model: &VoiceModel) -> Result<()> {
         self.synthesis_engine
-            .inference_core_mut()
+            .inference_core()
             .load_model(model)
             .await?;
         Ok(())
     }
 
     /// 音声モデルの読み込みを解除する。
-    pub fn unload_voice_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> {
+    pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
         self.synthesis_engine
-            .inference_core_mut()
+            .inference_core()
             .unload_model(voice_model_id)
     }
 
@@ -203,7 +203,7 @@ impl Synthesizer {
     }
 
     /// 今読み込んでいる音声モデルのメタ情報を返す。
-    pub fn metas(&self) -> &VoiceModelMeta {
+    pub fn metas(&self) -> VoiceModelMeta {
         self.synthesis_engine.inference_core().metas()
     }
 
@@ -616,7 +616,7 @@ mod tests {
     #[case(Ok(()))]
     #[tokio::test]
     async fn load_model_works(#[case] expected_result_at_initialized: Result<()>) {
-        let mut syntesizer = Synthesizer::new_with_initialize(
+        let syntesizer = Synthesizer::new_with_initialize(
             Arc::new(OpenJtalk::new_without_dic()),
             &InitializeOptions {
                 acceleration_mode: AccelerationMode::Cpu,
@@ -657,7 +657,7 @@ mod tests {
     #[tokio::test]
     async fn is_loaded_model_by_style_id_works(#[case] style_id: u32, #[case] expected: bool) {
         let style_id = StyleId::new(style_id);
-        let mut syntesizer = Synthesizer::new_with_initialize(
+        let syntesizer = Synthesizer::new_with_initialize(
             Arc::new(OpenJtalk::new_without_dic()),
             &InitializeOptions {
                 acceleration_mode: AccelerationMode::Cpu,
@@ -686,7 +686,7 @@ mod tests {
     #[rstest]
     #[tokio::test]
     async fn predict_duration_works() {
-        let mut syntesizer = Synthesizer::new_with_initialize(
+        let syntesizer = Synthesizer::new_with_initialize(
             Arc::new(OpenJtalk::new_without_dic()),
             &InitializeOptions {
                 acceleration_mode: AccelerationMode::Cpu,
@@ -718,7 +718,7 @@ mod tests {
     #[rstest]
     #[tokio::test]
     async fn predict_intonation_works() {
-        let mut syntesizer = Synthesizer::new_with_initialize(
+        let syntesizer = Synthesizer::new_with_initialize(
             Arc::new(OpenJtalk::new_without_dic()),
             &InitializeOptions {
                 acceleration_mode: AccelerationMode::Cpu,
@@ -760,7 +760,7 @@ mod tests {
     #[rstest]
     #[tokio::test]
     async fn decode_works() {
-        let mut syntesizer = Synthesizer::new_with_initialize(
+        let syntesizer = Synthesizer::new_with_initialize(
             Arc::new(OpenJtalk::new_without_dic()),
             &InitializeOptions {
                 acceleration_mode: AccelerationMode::Cpu,
diff --git a/crates/voicevox_core_c_api/include/voicevox_core.h b/crates/voicevox_core_c_api/include/voicevox_core.h
index d5cf4f6b2..8b012e901 100644
--- a/crates/voicevox_core_c_api/include/voicevox_core.h
+++ b/crates/voicevox_core_c_api/include/voicevox_core.h
@@ -94,10 +94,6 @@ enum VoicevoxResultCode
    * open_jtalk辞書ファイルが読み込まれていない
    */
   VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR = 1,
-  /**
-   * modelの読み込みに失敗した
-   */
-  VOICEVOX_RESULT_LOAD_MODEL_ERROR = 2,
   /**
    * サポートされているデバイス情報取得に失敗した
    */
@@ -106,10 +102,6 @@ enum VoicevoxResultCode
    * GPUモードがサポートされていない
    */
   VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4,
-  /**
-   * メタ情報読み込みに失敗した
-   */
-  VOICEVOX_RESULT_LOAD_METAS_ERROR = 5,
   /**
    * 無効なstyle_idが指定された
    */
@@ -143,17 +135,25 @@ enum VoicevoxResultCode
    */
   VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 15,
   /**
-   * ファイルオープンエラー
+   * ZIPファイルを開くことに失敗した
+   */
+  VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR = 16,
+  /**
+   * ZIP内のファイルが読めなかった
+   */
+  VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR = 17,
+  /**
+   * すでに読み込まれている音声モデルを読み込もうとした
    */
-  VOICEVOX_RESULT_OPEN_FILE_ERROR = 16,
+  VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR = 18,
   /**
-   * Modelを読み込めなかった
+   * すでに読み込まれているスタイルを読み込もうとした
    */
-  VOICEVOX_RESULT_VVM_MODEL_READ_ERROR = 17,
+  VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR = 26,
   /**
-   * すでに読み込まれているModelを読み込もうとした
+   * 無効なモデルデータ
    */
-  VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR = 18,
+  VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR = 27,
   /**
    * Modelが読み込まれていない
    */
@@ -569,7 +569,7 @@ void voicevox_synthesizer_delete(struct VoicevoxSynthesizer *synthesizer);
 #ifdef _WIN32
 __declspec(dllimport)
 #endif
-VoicevoxResultCode voicevox_synthesizer_load_voice_model(struct VoicevoxSynthesizer *synthesizer,
+VoicevoxResultCode voicevox_synthesizer_load_voice_model(const struct VoicevoxSynthesizer *synthesizer,
                                                          const struct VoicevoxVoiceModel *model);
 
 /**
@@ -588,7 +588,7 @@ VoicevoxResultCode voicevox_synthesizer_load_voice_model(struct VoicevoxSynthesi
 #ifdef _WIN32
 __declspec(dllimport)
 #endif
-VoicevoxResultCode voicevox_synthesizer_unload_voice_model(struct VoicevoxSynthesizer *synthesizer,
+VoicevoxResultCode voicevox_synthesizer_unload_voice_model(const struct VoicevoxSynthesizer *synthesizer,
                                                            VoicevoxVoiceModelId model_id);
 
 /**
@@ -629,19 +629,20 @@ bool voicevox_synthesizer_is_loaded_voice_model(const struct VoicevoxSynthesizer
 /**
  * 今読み込んでいる音声モデルのメタ情報を、JSONで取得する。
  *
+ * JSONの解放は ::voicevox_json_free で行う。
+ *
  * @param [in] synthesizer 音声シンセサイザ
  *
  * @return メタ情報のJSON文字列
  *
  * \safety{
  * - `synthesizer`は ::voicevox_synthesizer_new_with_initialize で得たものでなければならず、また ::voicevox_synthesizer_delete で解放されていてはいけない。
- * - 戻り値の文字列の<b>生存期間</b>(_lifetime_)は次にこの関数が呼ばれるか、`synthesizer`が破棄されるまでである。この生存期間を越えて文字列にアクセスしてはならない。
  * }
  */
 #ifdef _WIN32
 __declspec(dllimport)
 #endif
-const char *voicevox_synthesizer_get_metas_json(const struct VoicevoxSynthesizer *synthesizer);
+char *voicevox_synthesizer_create_metas_json(const struct VoicevoxSynthesizer *synthesizer);
 
 /**
  * このライブラリで利用可能なデバイスの情報を、JSONで取得する。
@@ -951,6 +952,7 @@ VoicevoxResultCode voicevox_synthesizer_tts(const struct VoicevoxSynthesizer *sy
  * \safety{
  * - `json`は以下のAPIで得られたポインタでなくてはいけない。
  *     - ::voicevox_create_supported_devices_json
+ *     - ::voicevox_synthesizer_create_metas_json
  *     - ::voicevox_synthesizer_create_audio_query
  *     - ::voicevox_synthesizer_create_accent_phrases
  *     - ::voicevox_synthesizer_replace_mora_data
diff --git a/crates/voicevox_core_c_api/src/c_impls.rs b/crates/voicevox_core_c_api/src/c_impls.rs
index f90db2337..a891593a4 100644
--- a/crates/voicevox_core_c_api/src/c_impls.rs
+++ b/crates/voicevox_core_c_api/src/c_impls.rs
@@ -1,12 +1,8 @@
-use std::{
-    ffi::{CStr, CString},
-    path::Path,
-    sync::Arc,
-};
+use std::{ffi::CString, path::Path, sync::Arc};
 
 use voicevox_core::{InitializeOptions, OpenJtalk, Result, Synthesizer, VoiceModel, VoiceModelId};
 
-use crate::{OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel};
+use crate::{CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel};
 
 impl OpenJtalkRc {
     pub(crate) fn new_with_initialize(open_jtalk_dic_dir: impl AsRef<Path>) -> Result<Self> {
@@ -23,30 +19,22 @@ impl VoicevoxSynthesizer {
     ) -> Result<Self> {
         let synthesizer =
             Synthesizer::new_with_initialize(open_jtalk.open_jtalk.clone(), options).await?;
-        let metas = synthesizer.metas();
-        let metas_cstring = CString::new(serde_json::to_string(&metas).unwrap()).unwrap();
-        Ok(Self {
-            synthesizer,
-            metas_cstring,
-        })
+        Ok(Self { synthesizer })
     }
 
-    pub(crate) async fn load_voice_model(&mut self, model: &VoiceModel) -> Result<()> {
+    pub(crate) async fn load_voice_model(&self, model: &VoiceModel) -> CApiResult<()> {
         self.synthesizer.load_voice_model(model).await?;
-        let metas = self.synthesizer.metas();
-        self.metas_cstring = CString::new(serde_json::to_string(metas).unwrap()).unwrap();
         Ok(())
     }
 
-    pub(crate) fn unload_voice_model(&mut self, model_id: &VoiceModelId) -> Result<()> {
+    pub(crate) fn unload_voice_model(&self, model_id: &VoiceModelId) -> Result<()> {
         self.synthesizer.unload_voice_model(model_id)?;
-        let metas = self.synthesizer.metas();
-        self.metas_cstring = CString::new(serde_json::to_string(metas).unwrap()).unwrap();
         Ok(())
     }
 
-    pub(crate) fn metas(&self) -> &CStr {
-        &self.metas_cstring
+    pub(crate) fn metas(&self) -> CString {
+        let metas = &self.synthesizer.metas();
+        CString::new(serde_json::to_string(metas).unwrap()).unwrap()
     }
 }
 
diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs
index 72d43497e..0e9cfd279 100644
--- a/crates/voicevox_core_c_api/src/helpers.rs
+++ b/crates/voicevox_core_c_api/src/helpers.rs
@@ -18,15 +18,20 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes
     }
 
     fn into_result_code(result: CApiResult<()>) -> VoicevoxResultCode {
-        use voicevox_core::{result_code::VoicevoxResultCode::*, Error::*};
+        use voicevox_core::{result_code::VoicevoxResultCode::*, Error::*, LoadModelErrorKind::*};
         use CApiError::*;
 
         match result {
             Ok(()) => VOICEVOX_RESULT_OK,
             Err(RustApi(NotLoadedOpenjtalkDict)) => VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR,
             Err(RustApi(GpuSupport)) => VOICEVOX_RESULT_GPU_SUPPORT_ERROR,
-            Err(RustApi(LoadModel { .. })) => VOICEVOX_RESULT_LOAD_MODEL_ERROR,
-            Err(RustApi(LoadMetas(_))) => VOICEVOX_RESULT_LOAD_METAS_ERROR,
+            Err(RustApi(LoadModel(err))) => match err.context() {
+                OpenZipFile => VOICEVOX_RESULT_OPEN_ZIP_FILE_ERROR,
+                ReadZipEntry { .. } => VOICEVOX_RESULT_READ_ZIP_ENTRY_ERROR,
+                ModelAlreadyLoaded { .. } => VOICEVOX_RESULT_MODEL_ALREADY_LOADED_ERROR,
+                StyleAlreadyLoaded { .. } => VOICEVOX_RESULT_STYLE_ALREADY_LOADED_ERROR,
+                InvalidModelData => VOICEVOX_RESULT_INVALID_MODEL_DATA_ERROR,
+            },
             Err(RustApi(GetSupportedDevices(_))) => VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR,
             Err(RustApi(InvalidStyleId { .. })) => VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR,
             Err(RustApi(InvalidModelId { .. })) => VOICEVOX_RESULT_INVALID_MODEL_ID_ERROR,
@@ -35,9 +40,6 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes
                 VOICEVOX_RESULT_EXTRACT_FULL_CONTEXT_LABEL_ERROR
             }
             Err(RustApi(UnloadedModel { .. })) => VOICEVOX_RESULT_UNLOADED_MODEL_ERROR,
-            Err(RustApi(AlreadyLoadedModel { .. })) => VOICEVOX_RESULT_ALREADY_LOADED_MODEL_ERROR,
-            Err(RustApi(OpenFile { .. })) => VOICEVOX_RESULT_OPEN_FILE_ERROR,
-            Err(RustApi(VvmRead { .. })) => VOICEVOX_RESULT_VVM_MODEL_READ_ERROR,
             Err(RustApi(ParseKana(_))) => VOICEVOX_RESULT_PARSE_KANA_ERROR,
             Err(RustApi(LoadUserDict(_))) => VOICEVOX_RESULT_LOAD_USER_DICT_ERROR,
             Err(RustApi(SaveUserDict(_))) => VOICEVOX_RESULT_SAVE_USER_DICT_ERROR,
@@ -52,10 +54,10 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes
     }
 }
 
-type CApiResult<T> = std::result::Result<T, CApiError>;
+pub(crate) type CApiResult<T> = std::result::Result<T, CApiError>;
 
 #[derive(Error, Debug)]
-pub(crate) enum CApiError {
+pub enum CApiError {
     #[error("{0}")]
     RustApi(#[from] voicevox_core::Error),
     #[error("UTF-8として不正な入力です")]
diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs
index db06de484..e97b0e5bb 100644
--- a/crates/voicevox_core_c_api/src/lib.rs
+++ b/crates/voicevox_core_c_api/src/lib.rs
@@ -316,7 +316,6 @@ pub extern "C" fn voicevox_voice_model_delete(model: Box<VoicevoxVoiceModel>) {
 #[derive(Getters)]
 pub struct VoicevoxSynthesizer {
     synthesizer: Synthesizer,
-    metas_cstring: CString,
 }
 
 /// ::VoicevoxSynthesizer を<b>構築</b>(_construct_)する。
@@ -376,14 +375,10 @@ pub extern "C" fn voicevox_synthesizer_delete(synthesizer: Box<VoicevoxSynthesiz
 /// }
 #[no_mangle]
 pub extern "C" fn voicevox_synthesizer_load_voice_model(
-    synthesizer: &mut VoicevoxSynthesizer,
+    synthesizer: &VoicevoxSynthesizer,
     model: &VoicevoxVoiceModel,
 ) -> VoicevoxResultCode {
-    into_result_code_with_error(
-        RUNTIME
-            .block_on(synthesizer.load_voice_model(model.model()))
-            .map_err(Into::into),
-    )
+    into_result_code_with_error(RUNTIME.block_on(synthesizer.load_voice_model(model.model())))
 }
 
 /// 音声モデルの読み込みを解除する。
@@ -399,7 +394,7 @@ pub extern "C" fn voicevox_synthesizer_load_voice_model(
 /// }
 #[no_mangle]
 pub unsafe extern "C" fn voicevox_synthesizer_unload_voice_model(
-    synthesizer: &mut VoicevoxSynthesizer,
+    synthesizer: &VoicevoxSynthesizer,
     model_id: VoicevoxVoiceModelId,
 ) -> VoicevoxResultCode {
     into_result_code_with_error((|| {
@@ -448,19 +443,21 @@ pub unsafe extern "C" fn voicevox_synthesizer_is_loaded_voice_model(
 
 /// 今読み込んでいる音声モデルのメタ情報を、JSONで取得する。
 ///
+/// JSONの解放は ::voicevox_json_free で行う。
+///
 /// @param [in] synthesizer 音声シンセサイザ
 ///
 /// @return メタ情報のJSON文字列
 ///
 /// \safety{
 /// - `synthesizer`は ::voicevox_synthesizer_new_with_initialize で得たものでなければならず、また ::voicevox_synthesizer_delete で解放されていてはいけない。
-/// - 戻り値の文字列の<b>生存期間</b>(_lifetime_)は次にこの関数が呼ばれるか、`synthesizer`が破棄されるまでである。この生存期間を越えて文字列にアクセスしてはならない。
 /// }
 #[no_mangle]
-pub extern "C" fn voicevox_synthesizer_get_metas_json(
+pub extern "C" fn voicevox_synthesizer_create_metas_json(
     synthesizer: &VoicevoxSynthesizer,
-) -> *const c_char {
-    synthesizer.metas().as_ptr()
+) -> *mut c_char {
+    let metas = synthesizer.metas();
+    C_STRING_DROP_CHECKER.whitelist(metas).into_raw()
 }
 
 /// このライブラリで利用可能なデバイスの情報を、JSONで取得する。
@@ -895,6 +892,7 @@ pub unsafe extern "C" fn voicevox_synthesizer_tts(
 /// \safety{
 /// - `json`は以下のAPIで得られたポインタでなくてはいけない。
 ///     - ::voicevox_create_supported_devices_json
+///     - ::voicevox_synthesizer_create_metas_json
 ///     - ::voicevox_synthesizer_create_audio_query
 ///     - ::voicevox_synthesizer_create_accent_phrases
 ///     - ::voicevox_synthesizer_replace_mora_data
@@ -1238,13 +1236,6 @@ mod tests {
         Err(Error::NotLoadedOpenjtalkDict),
         VoicevoxResultCode::VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR
     )]
-    #[case(
-        Err(Error::LoadModel {
-            path: "path/to/model.onnx".into(),
-            source: anyhow!("some load model error"),
-        }),
-        VoicevoxResultCode::VOICEVOX_RESULT_LOAD_MODEL_ERROR
-    )]
     #[case(
         Err(Error::GetSupportedDevices(anyhow!("some get supported devices error"))),
         VoicevoxResultCode::VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR
diff --git a/crates/voicevox_core_c_api/tests/e2e/symbols.rs b/crates/voicevox_core_c_api/tests/e2e/symbols.rs
index 47d15ec1d..32634bbbf 100644
--- a/crates/voicevox_core_c_api/tests/e2e/symbols.rs
+++ b/crates/voicevox_core_c_api/tests/e2e/symbols.rs
@@ -55,8 +55,8 @@ pub(crate) struct Symbols<'lib> {
         'lib,
         unsafe extern "C" fn(*const VoicevoxSynthesizer, VoicevoxVoiceModelId) -> bool,
     >,
-    pub(crate) voicevox_synthesizer_get_metas_json:
-        Symbol<'lib, unsafe extern "C" fn(*const VoicevoxSynthesizer) -> *const c_char>,
+    pub(crate) voicevox_synthesizer_create_metas_json:
+        Symbol<'lib, unsafe extern "C" fn(*const VoicevoxSynthesizer) -> *mut c_char>,
     pub(crate) voicevox_create_supported_devices_json:
         Symbol<'lib, unsafe extern "C" fn(*mut *mut c_char) -> VoicevoxResultCode>,
     pub(crate) voicevox_make_default_audio_query_options:
@@ -203,7 +203,7 @@ impl<'lib> Symbols<'lib> {
             voicevox_synthesizer_unload_voice_model,
             voicevox_synthesizer_is_gpu_mode,
             voicevox_synthesizer_is_loaded_voice_model,
-            voicevox_synthesizer_get_metas_json,
+            voicevox_synthesizer_create_metas_json,
             voicevox_create_supported_devices_json,
             voicevox_make_default_audio_query_options,
             voicevox_synthesizer_create_audio_query,
diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_with_initialize_output_json.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_with_initialize_output_json.rs
index 502880a14..41a7d853c 100644
--- a/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_with_initialize_output_json.rs
+++ b/crates/voicevox_core_c_api/tests/e2e/testcases/synthesizer_new_with_initialize_output_json.rs
@@ -31,7 +31,8 @@ impl assert_cdylib::TestCase for TestCase {
             voicevox_open_jtalk_rc_delete,
             voicevox_synthesizer_new_with_initialize,
             voicevox_synthesizer_delete,
-            voicevox_synthesizer_get_metas_json,
+            voicevox_synthesizer_create_metas_json,
+            voicevox_json_free,
             ..
         } = Symbols::new(lib)?;
 
@@ -60,9 +61,11 @@ impl assert_cdylib::TestCase for TestCase {
         };
 
         let metas_json = {
-            let metas_json =
-                CStr::from_ptr(voicevox_synthesizer_get_metas_json(synthesizer)).to_str()?;
-            serde_json::to_string_pretty(&metas_json.parse::<serde_json::Value>()?).unwrap()
+            let raw = voicevox_synthesizer_create_metas_json(synthesizer);
+            let metas_json = &CStr::from_ptr(raw).to_str()?.parse::<serde_json::Value>()?;
+            let metas_json = serde_json::to_string_pretty(metas_json).unwrap();
+            voicevox_json_free(raw);
+            metas_json
         };
 
         std::assert_eq!(SNAPSHOTS.metas, metas_json);
diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs
index 40b9e31bd..3c76dd8a0 100644
--- a/crates/voicevox_core_python_api/src/lib.rs
+++ b/crates/voicevox_core_python_api/src/lib.rs
@@ -178,7 +178,7 @@ impl Synthesizer {
     #[getter]
     fn metas<'py>(&self, py: Python<'py>) -> PyResult<Vec<&'py PyAny>> {
         let synthesizer = self.synthesizer.get()?;
-        to_pydantic_voice_model_meta(RUNTIME.block_on(synthesizer.lock()).metas(), py)
+        to_pydantic_voice_model_meta(&RUNTIME.block_on(synthesizer.lock()).metas(), py)
     }
 
     fn load_voice_model<'py>(