Merge branch 'main' into change-liberate-voicevox-core

VOICEVOX · Oct 29, 2024 · f9e3cde · f9e3cde
2 parents 5187f9e + 5641e37
commit f9e3cde
Show file tree

Hide file tree

Showing 27 changed files with 373 additions and 137 deletions.
diff --git a/.github/workflows/generate_document.yml b/.github/workflows/generate_document.yml
@@ -51,22 +51,22 @@ jobs:
           poetry install --with test
       - name: mkdir public
         run: mkdir -p public/apis/c_api
-      - name: cp docs/apis/index.html
-        run: cp docs/apis/index.html public/apis/
+      - name: cp docs/ghpages/apis/index.html
+        run: cp docs/ghpages/apis/index.html public/apis/
       - name: Generate rustdoc
         run: |
           cargo +${{ steps.docsrs-rust-version.outputs.rust-toolchain }} docs-rs -p voicevox_core
           mv target/x86_64-unknown-linux-gnu/doc public/apis/rust_api
       - name: cp crates/voicevox_core_c_api/include/voicevox_core.h
-        run: cp crates/voicevox_core_c_api/include/voicevox_core.h docs/apis/c_api/doxygen/
+        run: cp crates/voicevox_core_c_api/include/voicevox_core.h docs/ghpages/apis/c_api/doxygen/
       - name: Generate doxygen document
         uses: mattnotmitt/[email protected]
         with:
-          working-directory: "docs/apis/c_api/doxygen"
+          working-directory: "docs/ghpages/apis/c_api/doxygen"
       - name: Build voicevox_core_python_api
         run: maturin develop --manifest-path ./crates/voicevox_core_python_api/Cargo.toml --locked
       - name: Generate Sphinx document
-        run: sphinx-build docs/apis/python_api public/apis/python_api
+        run: sphinx-build docs/ghpages/apis/python_api public/apis/python_api
       - name: Generate Javadoc
         run: |
           (cd crates/voicevox_core_java_api && ./gradlew javadoc)

diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
-[submodule "docs/apis/c_api/doxygen/doxygen-awesome-css"]
-	path = docs/apis/c_api/doxygen/doxygen-awesome-css
+[submodule "docs/ghpages/apis/c_api/doxygen/doxygen-awesome-css"]
+	path = docs/ghpages/apis/c_api/doxygen/doxygen-awesome-css
 	url = https://github.com/jothepro/doxygen-awesome-css.git
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 
 ## ユーザーガイド
 
-[VOICEVOX コア ユーザーガイド](./docs/usage.md)をご覧ください。
+[VOICEVOX コア ユーザーガイド](./docs/guide/user/usage.md)をご覧ください。
 
 ## 環境構築
 
@@ -57,7 +57,7 @@ chmod +x download
 ./download
 ```
 
-詳細な Downloader の使い方については [こちら](./docs/downloader.md) を参照してください
+詳細な Downloader の使い方については [こちら](./docs/guide/user/downloader.md) を参照してください
 
 <details>
 <summary> Downloader を使わない場合</summary>
@@ -88,13 +88,13 @@ Raspberry Pi 用の ONNX Runtime は以下からダウンロードできます
 nvidia 製 GPU を搭載した Windows, Linux PC では CUDA を用いた合成が可能です。
 
 CUDA 版を利用するには Downloader の実行が必要です。  
-詳細は [CUDA 版をダウンロードする場合](./docs/downloader.md#cuda) を参照してください
+詳細は [CUDA 版をダウンロードする場合](./docs/guide/user/downloader.md#cuda) を参照してください
 
 ##### DirectML
 
 DirectX12 に対応した GPU を搭載した Windows PC では DirectML を用いた合成が可能です  
 DirectML 版を利用するには Downloader の実行が必要です。  
-詳細は [DirectML 版をダウンロードする場合](./docs/downloads/downloader.md#directml) を参照してください
+詳細は [DirectML 版をダウンロードする場合](./docs/guide/user/downloader.md#directml) を参照してください
 
 macOS の場合、CUDA の macOS サポートは現在終了しているため、VOICEVOX CORE の macOS 向けコアライブラリも CUDA, CUDNN を利用しない CPU 版のみの提供となります。
 
@@ -154,7 +154,7 @@ cargo build --release -p voicevox_core_c_api --features load-onnxruntime
 ```
 
 DLL 用のヘッダファイルの雛形は [crates/voicevox_core_c_api/include/voicevox_core.h](https://github.com/VOICEVOX/voicevox_core/tree/main/crates/voicevox_core_c_api/include/voicevox_core.h) にあります。
-詳しくは[feature-options.md](./docs/feature-options.md)を参照してください。
+詳しくは[feature-options.md](./docs/guide/user/feature-options.md)を参照してください。
 
 ```bash
 # ヘッダファイルを加工し、マクロ`VOICEVOX_LOAD_ONNXRUNTIME`を宣言

diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs
@@ -2,8 +2,8 @@
 
 pub use crate::{
     engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime,
-    synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict,
-    voice_model::blocking::VoiceModelFile,
+    synthesizer::blocking::AudioFeature, synthesizer::blocking::Synthesizer,
+    user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModelFile,
 };
 
 pub mod onnxruntime {

diff --git a/crates/voicevox_core/src/engine/audio_file.rs b/crates/voicevox_core/src/engine/audio_file.rs
@@ -0,0 +1,32 @@
+use std::io::{Cursor, Write as _};
+
+/// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
+pub fn wav_from_s16le(pcm: &[u8], sampling_rate: u32, is_stereo: bool) -> Vec<u8> {
+    let num_channels: u16 = if is_stereo { 2 } else { 1 };
+    let bit_depth: u16 = 16;
+    let block_size: u16 = bit_depth * num_channels / 8;
+
+    let bytes_size = pcm.len() as u32;
+    let wave_size = bytes_size + 44;
+
+    let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
+    let mut cur = Cursor::new(buf);
+
+    cur.write_all("RIFF".as_bytes()).unwrap();
+    cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
+    cur.write_all("WAVEfmt ".as_bytes()).unwrap();
+    cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
+    cur.write_all(&1_u16.to_le_bytes()).unwrap(); // linear PCM
+    cur.write_all(&num_channels.to_le_bytes()).unwrap();
+    cur.write_all(&sampling_rate.to_le_bytes()).unwrap();
+
+    let block_rate = sampling_rate * block_size as u32;
+
+    cur.write_all(&block_rate.to_le_bytes()).unwrap();
+    cur.write_all(&block_size.to_le_bytes()).unwrap();
+    cur.write_all(&bit_depth.to_le_bytes()).unwrap();
+    cur.write_all("data".as_bytes()).unwrap();
+    cur.write_all(&bytes_size.to_le_bytes()).unwrap();
+    cur.write_all(pcm).unwrap();
+    cur.into_inner()
+}
diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs
@@ -1,11 +1,13 @@
 mod acoustic_feature_extractor;
+mod audio_file;
 mod full_context_label;
 mod kana_parser;
 mod model;
 mod mora_list;
 pub(crate) mod open_jtalk;
 
 pub(crate) use self::acoustic_feature_extractor::OjtPhoneme;
+pub use self::audio_file::wav_from_s16le;
 pub(crate) use self::full_context_label::{
     extract_full_context_label, mora_to_text, FullContextLabelError,
 };

diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs
@@ -83,7 +83,7 @@ use rstest_reuse;
 
 pub use self::{
     devices::SupportedDevices,
-    engine::{AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
+    engine::{wav_from_s16le, AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
     error::{Error, ErrorKind},
     metas::{
         RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleType, StyleVersion,