diff --git a/Cargo.lock b/Cargo.lock index a9ba449bd..56c0a14ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,17 +83,6 @@ dependencies = [ "opaque-debug", ] -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.7", - "once_cell", - "version_check", -] - [[package]] name = "aho-corasick" version = "0.7.19" @@ -147,7 +136,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5" dependencies = [ "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -161,6 +150,23 @@ dependencies = [ "futures-core", ] +[[package]] +name = "async-compression" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + [[package]] name = "async-executor" version = "1.4.1" @@ -260,7 +266,22 @@ checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", +] + +[[package]] +name = "async_zip" +version = "0.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50d29ab7e2f9e808cca1a69ea56a36f4ff216f54a41a23aae1fd4afc05cc020" +dependencies = [ + "async-compression", + "chrono", + "crc32fast", + "log", + "pin-project", + "thiserror", + "tokio", ] [[package]] @@ -474,7 +495,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn", + "syn 1.0.102", "tempfile", "toml 0.5.9", ] @@ -591,7 +612,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -826,7 +847,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdffe87e1d521a10f9696f833fe502293ea446d7f256c06128293a4119bdf4cb" dependencies = [ "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -893,7 +914,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 1.0.102", ] [[package]] @@ -910,7 +931,7 @@ checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -921,7 +942,7 @@ checksum = "0c5905670fd9c320154f3a4a01c9e609733cd7b753f3c58777ab7d5ce26686b3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -932,7 +953,7 @@ checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -1179,9 +1200,9 @@ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" [[package]] name = "futures" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1194,9 +1215,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1204,15 +1225,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1221,9 +1242,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-lite" @@ -1242,26 +1263,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "futures-sink" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-timer" @@ -1271,9 +1292,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.25" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1337,7 +1358,7 @@ checksum = "41973d4c45f7a35af8753ba3457cc99d406d863941fd7f52663cff54a5ab99b3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -1779,9 +1800,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.134" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "libloading" @@ -1867,6 +1888,17 @@ dependencies = [ "value-bag", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "matchers" version = "0.1.0" @@ -1899,18 +1931,18 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" -version = "0.6.5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" dependencies = [ "autocfg", ] [[package]] name = "memoffset" -version = "0.7.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" dependencies = [ "autocfg", ] @@ -1967,6 +1999,15 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "nanoid" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "ndarray" version = "0.15.6" @@ -2097,21 +2138,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" -[[package]] -name = "numpy" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a462c1af5ba1fddec1488c4646993a23ae7931f9e170ccba23e9c7c834277797" -dependencies = [ - "ahash", - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", -] - [[package]] name = "object" version = "0.29.0" @@ -2349,7 +2375,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -2463,7 +2489,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.102", "version_check", ] @@ -2486,9 +2512,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.46" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -2504,14 +2530,14 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.17.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201b6887e5576bf2f945fe65172c1fcbf3fcf285b23e4d71eb171d9736e38d32" +checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" dependencies = [ "cfg-if", "indoc", "libc", - "memoffset 0.6.5", + "memoffset 0.8.0", "parking_lot", "pyo3-build-config", "pyo3-ffi", @@ -2519,11 +2545,24 @@ dependencies = [ "unindent", ] +[[package]] +name = "pyo3-asyncio" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3564762e37035cfc486228e10b0528460fa026d681b5763873c693aa0d5c260" +dependencies = [ + "futures", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", +] + [[package]] name = "pyo3-build-config" -version = "0.17.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0708c9ed01692635cbf056e286008e5a2927ab1a5e48cdd3aeb1ba5a6fef47" +checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" dependencies = [ "once_cell", "target-lexicon", @@ -2531,9 +2570,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.17.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90352dea4f486932b72ddf776264d293f85b79a1d214de1d023927b41461132d" +checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" dependencies = [ "libc", "pyo3-build-config", @@ -2541,9 +2580,9 @@ dependencies = [ [[package]] name = "pyo3-log" -version = "0.7.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5695ccff5060c13ca1751cf8c857a12da9b0bf0378cb071c5e0326f7c7e4c1b" +checksum = "f9c8b57fe71fb5dcf38970ebedc2b1531cf1c14b1b9b4c560a182a57e115575c" dependencies = [ "arc-swap", "log", @@ -2552,32 +2591,32 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.17.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb24b804a2d9e88bfcc480a5a6dd76f006c1e3edaf064e8250423336e2cd79d" +checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 1.0.102", ] [[package]] name = "pyo3-macros-backend" -version = "0.17.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f22bb49f6a7348c253d7ac67a6875f2dc65f36c2ae64a82c381d528972bea6d6" +checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -2793,7 +2832,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version 0.4.0", - "syn", + "syn 1.0.102", ] [[package]] @@ -2952,7 +2991,7 @@ checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -2961,6 +3000,7 @@ version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" dependencies = [ + "indexmap", "itoa", "ryu", "serde", @@ -3140,14 +3180,14 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", @@ -3201,7 +3241,7 @@ dependencies = [ "quote", "serde", "serde_derive", - "syn", + "syn 1.0.102", ] [[package]] @@ -3217,7 +3257,7 @@ dependencies = [ "serde_derive", "serde_json", "sha1 0.6.1", - "syn", + "syn 1.0.102", ] [[package]] @@ -3251,7 +3291,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.102", ] [[package]] @@ -3294,6 +3334,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tar" version = "0.4.38" @@ -3373,7 +3424,7 @@ checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -3447,7 +3498,7 @@ dependencies = [ "proc-macro2", "quote", "standback", - "syn", + "syn 1.0.102", ] [[package]] @@ -3467,31 +3518,30 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.24.1" +version = "1.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d9f76183f91ecfb55e1d7d5602bd1d979e38a3a522fe900241cf195624d67ae" +checksum = "c3c786bf8134e5a3a166db9b29ab8f48134739014a3eca7bc6bfa95d673b136f" dependencies = [ "autocfg", "bytes 1.1.0", "libc", - "memchr", "mio", "num_cpus", "pin-project-lite", "socket2", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -3589,7 +3639,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -3684,7 +3734,7 @@ checksum = "8f9568611f0de5e83e0993b85c54679cd0afd659adcfcb0233f16280b980492e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", ] [[package]] @@ -3807,13 +3857,17 @@ name = "voicevox_core" version = "0.0.0" dependencies = [ "anyhow", + "async_zip", "cfg-if", "derive-getters", "derive-new", "easy-ext", + "flate2", "fs-err", + "futures", "heck", "humansize", + "nanoid", "once_cell", "onnxruntime", "open_jtalk", @@ -3823,8 +3877,10 @@ dependencies = [ "rstest", "serde", "serde_json", + "tar", "test_util", "thiserror", + "tokio", "tracing", "windows", ] @@ -3837,6 +3893,7 @@ dependencies = [ "assert_cmd", "chrono", "clap 4.0.10", + "derive-getters", "duct", "easy-ext", "inventory", @@ -3853,7 +3910,9 @@ dependencies = [ "rstest", "serde", "serde_json", + "test_util", "thiserror", + "tokio", "toml 0.7.2", "tracing-subscriber", "typetag", @@ -3868,11 +3927,13 @@ dependencies = [ "easy-ext", "fs_extra", "log", - "numpy", + "once_cell", "pyo3", + "pyo3-asyncio", "pyo3-log", "serde", "serde_json", + "tokio", "tracing", "voicevox_core", ] @@ -3935,7 +3996,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.102", "wasm-bindgen-shared", ] @@ -3969,7 +4030,7 @@ checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.102", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4066,12 +4127,12 @@ version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] @@ -4094,21 +4155,51 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", + "windows_aarch64_gnullvm 0.42.0", "windows_aarch64_msvc 0.42.0", "windows_i686_gnu 0.42.0", "windows_i686_msvc 0.42.0", "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm", + "windows_x86_64_gnullvm 0.42.0", "windows_x86_64_msvc 0.42.0", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" @@ -4121,6 +4212,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + [[package]] name = "windows_i686_gnu" version = "0.36.1" @@ -4133,6 +4230,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + [[package]] name = "windows_i686_msvc" version = "0.36.1" @@ -4145,6 +4248,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" @@ -4157,12 +4266,24 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" @@ -4175,6 +4296,12 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + [[package]] name = "winreg" version = "0.10.1" @@ -4204,6 +4331,15 @@ dependencies = [ "fs-err", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 94b1e19f0..4a81b36dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,12 +23,14 @@ once_cell = "1.15.0" process_path = { git = "https://github.com/VOICEVOX/process_path.git", rev = "de226a26e8e18edbdb1d6f986afe37bbbf35fbf4" } regex = "1.6.0" serde = { version = "1.0.145", features = ["derive"] } -serde_json = "1.0.85" +serde_json = { version = "1.0.85", features = ["preserve_order"] } test_util = { path = "crates/test_util" } thiserror = "1.0.37" tracing = { version = "0.1.37", features = ["log"] } tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } voicevox_core = { path = "crates/voicevox_core" } +tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "macros", "sync"] } +derive-getters = "0.2.0" # min-sized-rustを元にrelease buildのサイズが小さくなるようにした # https://github.com/johnthagen/min-sized-rust diff --git a/crates/download/Cargo.toml b/crates/download/Cargo.toml index 421e90563..a14a1e066 100644 --- a/crates/download/Cargo.toml +++ b/crates/download/Cargo.toml @@ -21,7 +21,7 @@ platforms = "3.0.2" rayon = "1.6.1" reqwest = { version = "0.11.13", default-features = false, features = ["rustls-tls", "stream"] } strum = { version = "0.24.1", features = ["derive"] } -tokio = { version = "1.24.1", features = ["macros", "rt-multi-thread", "sync"] } +tokio.workspace = true tracing.workspace = true tracing-subscriber.workspace = true url = "2.3.0" diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index 049e46292..4d98fe352 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -12,7 +12,7 @@ directml = ["onnxruntime/directml"] [dependencies] anyhow.workspace = true cfg-if = "1.0.0" -derive-getters = "0.2.0" +derive-getters.workspace = true derive-new = "0.5.9" easy-ext.workspace = true fs-err.workspace = true @@ -25,10 +25,16 @@ thiserror.workspace = true tracing.workspace = true open_jtalk = { git = "https://github.com/VOICEVOX/open_jtalk-rs.git", rev="d766a52bad4ccafe18597e57bd6842f59dca881e" } regex.workspace = true +async_zip = { version = "0.0.11", features = ["full"] } +futures = "0.3.26" +nanoid = "0.4.0" +tokio.workspace = true [dev-dependencies] rstest = "0.15.0" pretty_assertions = "1.3.0" +flate2 = "1.0.24" +tar = "0.4.38" heck = "0.4.0" test_util.workspace = true diff --git a/crates/voicevox_core/src/devices.rs b/crates/voicevox_core/src/devices.rs new file mode 100644 index 000000000..ceef4b9d3 --- /dev/null +++ b/crates/voicevox_core/src/devices.rs @@ -0,0 +1,49 @@ +use serde::{Deserialize, Serialize}; + +use super::*; + +#[derive(Getters, Debug, Serialize, Deserialize)] +pub struct SupportedDevices { + cpu: bool, + cuda: bool, + dml: bool, +} + +impl SupportedDevices { + /// サポートされているデバイス情報を取得する + pub fn get_supported_devices() -> Result { + let mut cuda_support = false; + let mut dml_support = false; + for provider in onnxruntime::session::get_available_providers() + .map_err(|e| Error::GetSupportedDevices(e.into()))? + .iter() + { + match provider.as_str() { + "CUDAExecutionProvider" => cuda_support = true, + "DmlExecutionProvider" => dml_support = true, + _ => {} + } + } + + Ok(SupportedDevices { + cpu: true, + cuda: cuda_support, + dml: dml_support, + }) + } + + pub fn to_json(&self) -> serde_json::Value { + serde_json::to_value(self).expect("should not fail") + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[rstest] + fn supported_devices_get_supported_devices_works() { + let result = SupportedDevices::get_supported_devices(); + // 環境によって結果が変わるので、関数呼び出しが成功するかどうかの確認のみ行う + assert!(result.is_ok(), "{result:?}"); + } +} diff --git a/crates/voicevox_core/src/engine/full_context_label.rs b/crates/voicevox_core/src/engine/full_context_label.rs index b8e3cce2d..f52b84ade 100644 --- a/crates/voicevox_core/src/engine/full_context_label.rs +++ b/crates/voicevox_core/src/engine/full_context_label.rs @@ -306,7 +306,7 @@ impl Utterance { } pub fn extract_full_context_label( - open_jtalk: &mut open_jtalk::OpenJtalk, + open_jtalk: &open_jtalk::OpenJtalk, text: impl AsRef, ) -> Result { let labels = open_jtalk.extract_fullcontext(text)?; diff --git a/crates/voicevox_core/src/engine/open_jtalk.rs b/crates/voicevox_core/src/engine/open_jtalk.rs index 60c3acbf1..f07fe89cb 100644 --- a/crates/voicevox_core/src/engine/open_jtalk.rs +++ b/crates/voicevox_core/src/engine/open_jtalk.rs @@ -1,7 +1,12 @@ -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + sync::Mutex, +}; use ::open_jtalk::*; +use crate::Error; + #[derive(thiserror::Error, Debug)] pub enum OpenJtalkError { #[error("open_jtalk load error")] @@ -17,55 +22,74 @@ pub enum OpenJtalkError { pub type Result = std::result::Result; pub struct OpenJtalk { + resources: Mutex, + dict_loaded: bool, +} + +struct Resources { mecab: ManagedResource, njd: ManagedResource, jpcommon: ManagedResource, - dict_loaded: bool, } +#[allow(unsafe_code)] +unsafe impl Send for Resources {} + impl OpenJtalk { - pub fn initialize() -> Self { + pub fn new_without_dic() -> Self { Self { - mecab: ManagedResource::initialize(), - njd: ManagedResource::initialize(), - jpcommon: ManagedResource::initialize(), + resources: Mutex::new(Resources { + mecab: ManagedResource::initialize(), + njd: ManagedResource::initialize(), + jpcommon: ManagedResource::initialize(), + }), dict_loaded: false, } } - - pub fn extract_fullcontext(&mut self, text: impl AsRef) -> Result> { - let result = self.extract_fullcontext_non_reflesh(text); - self.jpcommon.refresh(); - self.njd.refresh(); - self.mecab.refresh(); - result + pub fn new_with_initialize( + open_jtalk_dict_dir: impl AsRef, + ) -> crate::result::Result { + let mut s = Self::new_without_dic(); + s.load(open_jtalk_dict_dir) + .map_err(|_| Error::NotLoadedOpenjtalkDict)?; + Ok(s) } - fn extract_fullcontext_non_reflesh(&mut self, text: impl AsRef) -> Result> { + pub fn extract_fullcontext(&self, text: impl AsRef) -> Result> { + let Resources { + mecab, + njd, + jpcommon, + } = &mut *self.resources.lock().unwrap(); + + jpcommon.refresh(); + njd.refresh(); + mecab.refresh(); + let mecab_text = text2mecab(text.as_ref()).map_err(|e| OpenJtalkError::ExtractFullContext { text: text.as_ref().into(), source: Some(e.into()), })?; - if self.mecab.analysis(mecab_text) { - self.njd.mecab2njd( - self.mecab + if mecab.analysis(mecab_text) { + njd.mecab2njd( + mecab .get_feature() .ok_or(OpenJtalkError::ExtractFullContext { text: text.as_ref().into(), source: None, })?, - self.mecab.get_size(), + mecab.get_size(), ); - self.njd.set_pronunciation(); - self.njd.set_digit(); - self.njd.set_accent_phrase(); - self.njd.set_accent_type(); - self.njd.set_unvoiced_vowel(); - self.njd.set_long_vowel(); - self.jpcommon.njd2jpcommon(&self.njd); - self.jpcommon.make_label(); - self.jpcommon + njd.set_pronunciation(); + njd.set_digit(); + njd.set_accent_phrase(); + njd.set_accent_type(); + njd.set_unvoiced_vowel(); + njd.set_long_vowel(); + jpcommon.njd2jpcommon(njd); + jpcommon.make_label(); + jpcommon .get_label_feature_to_iter() .ok_or_else(|| OpenJtalkError::ExtractFullContext { text: text.as_ref().into(), @@ -80,15 +104,20 @@ impl OpenJtalk { } } - pub fn load(&mut self, mecab_dict_dir: impl AsRef) -> Result<()> { - let result = self.mecab.load(mecab_dict_dir.as_ref()); + fn load(&mut self, open_jtalk_dict_dir: impl AsRef) -> Result<()> { + let result = self + .resources + .lock() + .unwrap() + .mecab + .load(open_jtalk_dict_dir.as_ref()); if result { self.dict_loaded = true; Ok(()) } else { self.dict_loaded = false; Err(OpenJtalkError::Load { - mecab_dict_dir: mecab_dict_dir.as_ref().into(), + mecab_dict_dir: open_jtalk_dict_dir.as_ref().into(), }) } } @@ -101,7 +130,7 @@ impl OpenJtalk { #[cfg(test)] mod tests { use super::*; - use test_util::OPEN_JTALK_DIC_DIR; + use ::test_util::OPEN_JTALK_DIC_DIR; use crate::{macros::tests::assert_debug_fmt_eq, *}; @@ -196,8 +225,7 @@ mod tests { #[case("",Err(OpenJtalkError::ExtractFullContext{text:"".into(),source:None}))] #[case("こんにちは、ヒホです。", Ok(testdata_hello_hiho()))] fn extract_fullcontext_works(#[case] text: &str, #[case] expected: super::Result>) { - let mut open_jtalk = OpenJtalk::initialize(); - open_jtalk.load(OPEN_JTALK_DIC_DIR).unwrap(); + let open_jtalk = OpenJtalk::new_with_initialize(OPEN_JTALK_DIC_DIR).unwrap(); let result = open_jtalk.extract_fullcontext(text); assert_debug_fmt_eq!(expected, result); } @@ -208,8 +236,7 @@ mod tests { #[case] text: &str, #[case] expected: super::Result>, ) { - let mut open_jtalk = OpenJtalk::initialize(); - open_jtalk.load(OPEN_JTALK_DIC_DIR).unwrap(); + let open_jtalk = OpenJtalk::new_with_initialize(OPEN_JTALK_DIC_DIR).unwrap(); for _ in 0..10 { let result = open_jtalk.extract_fullcontext(text); assert_debug_fmt_eq!(expected, result); diff --git a/crates/voicevox_core/src/engine/synthesis_engine.rs b/crates/voicevox_core/src/engine/synthesis_engine.rs index b16271a50..0adce4fe2 100644 --- a/crates/voicevox_core/src/engine/synthesis_engine.rs +++ b/crates/voicevox_core/src/engine/synthesis_engine.rs @@ -1,6 +1,6 @@ use derive_new::new; use std::io::{Cursor, Write}; -use std::path::Path; +use std::sync::Arc; use super::full_context_label::Utterance; use super::open_jtalk::OpenJtalk; @@ -17,7 +17,7 @@ const MORA_PHONEME_LIST: &[&str] = &[ #[derive(new)] pub struct SynthesisEngine { inference_core: InferenceCore, - open_jtalk: OpenJtalk, + open_jtalk: Arc, } #[allow(unsafe_code)] @@ -34,16 +34,16 @@ impl SynthesisEngine { &mut self.inference_core } - pub fn create_accent_phrases( - &mut self, - text: impl AsRef, - speaker_id: u32, + pub async fn create_accent_phrases( + &self, + text: &str, + style_id: StyleId, ) -> Result> { - if text.as_ref().is_empty() { + if text.is_empty() { return Ok(Vec::new()); } - let utterance = Utterance::extract_full_context_label(&mut self.open_jtalk, text.as_ref())?; + let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?; let accent_phrases: Vec = utterance .breath_groups() @@ -108,22 +108,24 @@ impl SynthesisEngine { accum_vec }); - self.replace_mora_data(&accent_phrases, speaker_id) + self.replace_mora_data(&accent_phrases, style_id).await } - pub fn replace_mora_data( - &mut self, + pub async fn replace_mora_data( + &self, accent_phrases: &[AccentPhraseModel], - speaker_id: u32, + style_id: StyleId, ) -> Result> { - let accent_phrases = self.replace_phoneme_length(accent_phrases, speaker_id)?; - self.replace_mora_pitch(&accent_phrases, speaker_id) + let accent_phrases = self + .replace_phoneme_length(accent_phrases, style_id) + .await?; + self.replace_mora_pitch(&accent_phrases, style_id).await } - pub fn replace_phoneme_length( - &mut self, + pub async fn replace_phoneme_length( + &self, accent_phrases: &[AccentPhraseModel], - speaker_id: u32, + style_id: StyleId, ) -> Result> { let (_, phoneme_data_list) = SynthesisEngine::initial_process(accent_phrases); @@ -134,8 +136,9 @@ impl SynthesisEngine { .map(|phoneme_data| phoneme_data.phoneme_id()) .collect(); let phoneme_length = self - .inference_core_mut() - .predict_duration(&phoneme_list_s, speaker_id)?; + .inference_core() + .predict_duration(&phoneme_list_s, style_id) + .await?; let mut index = 0; let new_accent_phrases = accent_phrases @@ -181,10 +184,10 @@ impl SynthesisEngine { Ok(new_accent_phrases) } - pub fn replace_mora_pitch( - &mut self, + pub async fn replace_mora_pitch( + &self, accent_phrases: &[AccentPhraseModel], - speaker_id: u32, + style_id: StyleId, ) -> Result> { let (_, phoneme_data_list) = SynthesisEngine::initial_process(accent_phrases); @@ -246,16 +249,19 @@ impl SynthesisEngine { end_accent_phrase_list.push(base_end_accent_phrase_list[vowel_index as usize]); } - let mut f0_list = self.inference_core_mut().predict_intonation( - vowel_phoneme_list.len(), - &vowel_phoneme_list, - &consonant_phoneme_list, - &start_accent_list, - &end_accent_list, - &start_accent_phrase_list, - &end_accent_phrase_list, - speaker_id, - )?; + let mut f0_list = self + .inference_core() + .predict_intonation( + vowel_phoneme_list.len(), + &vowel_phoneme_list, + &consonant_phoneme_list, + &start_accent_list, + &end_accent_list, + &start_accent_phrase_list, + &end_accent_phrase_list, + style_id, + ) + .await?; for i in 0..vowel_phoneme_data_list.len() { if UNVOICED_MORA_PHONEME_LIST @@ -308,10 +314,10 @@ impl SynthesisEngine { Ok(new_accent_phrases) } - pub fn synthesis( - &mut self, + pub async fn synthesis( + &self, query: &AudioQueryModel, - speaker_id: u32, + style_id: StyleId, enable_interrogative_upspeak: bool, ) -> Result> { let speed_scale = *query.speed_scale(); @@ -409,28 +415,32 @@ impl SynthesisEngine { // 2次元のvectorを1次元に変換し、アドレスを連続させる let flatten_phoneme = phoneme.into_iter().flatten().collect::>(); - self.inference_core_mut().decode( - f0.len(), - OjtPhoneme::num_phoneme(), - &f0, - &flatten_phoneme, - speaker_id, - ) + self.inference_core() + .decode( + f0.len(), + OjtPhoneme::num_phoneme(), + &f0, + &flatten_phoneme, + style_id, + ) + .await } - pub fn synthesis_wave_format( - &mut self, + pub async fn synthesis_wave_format( + &self, query: &AudioQueryModel, - speaker_id: u32, + style_id: StyleId, enable_interrogative_upspeak: bool, ) -> Result> { - let wave = self.synthesis(query, speaker_id, enable_interrogative_upspeak)?; - + let wave = self + .synthesis(query, style_id, enable_interrogative_upspeak) + .await?; let volume_scale = *query.volume_scale(); let output_stereo = *query.output_stereo(); - // TODO: 44.1kHzなどの対応 let output_sampling_rate = *query.output_sampling_rate(); + // TODO: 44.1kHzなどの対応 + let num_channels: u16 = if output_stereo { 2 } else { 1 }; let bit_depth: u16 = 16; let repeat_count: u32 = @@ -470,12 +480,6 @@ impl SynthesisEngine { Ok(cur.into_inner()) } - pub fn load_openjtalk_dict(&mut self, mecab_dict_dir: impl AsRef) -> Result<()> { - self.open_jtalk - .load(mecab_dict_dir) - .map_err(|_| Error::NotLoadedOpenjtalkDict) - } - pub fn is_openjtalk_dict_loaded(&self) -> bool { self.open_jtalk.dict_loaded() } @@ -644,44 +648,43 @@ fn make_interrogative_mora(last_mora: &MoraModel) -> MoraModel { #[cfg(test)] mod tests { use super::*; + use ::test_util::OPEN_JTALK_DIC_DIR; use pretty_assertions::assert_eq; - use test_util::OPEN_JTALK_DIC_DIR; - use crate::{macros::tests::assert_debug_fmt_eq, *}; + use crate::*; #[rstest] - fn load_openjtalk_dict_works() { - let core = InferenceCore::new(false, None); - let mut synthesis_engine = SynthesisEngine::new(core, OpenJtalk::initialize()); - - let result = synthesis_engine.load_openjtalk_dict(OPEN_JTALK_DIC_DIR); - assert_debug_fmt_eq!(result, Ok(())); - - let result = synthesis_engine.load_openjtalk_dict(""); - assert_debug_fmt_eq!(result, Err(Error::NotLoadedOpenjtalkDict)); - } - - #[rstest] - fn is_openjtalk_dict_loaded_works() { - let core = InferenceCore::new(false, None); - let mut synthesis_engine = SynthesisEngine::new(core, OpenJtalk::initialize()); + #[tokio::test] + async fn is_openjtalk_dict_loaded_works() { + let core = InferenceCore::new_with_initialize(false, 0, false) + .await + .unwrap(); + let synthesis_engine = SynthesisEngine::new( + core, + OpenJtalk::new_with_initialize(OPEN_JTALK_DIC_DIR) + .unwrap() + .into(), + ); - let _ = synthesis_engine.load_openjtalk_dict(OPEN_JTALK_DIC_DIR); assert_eq!(synthesis_engine.is_openjtalk_dict_loaded(), true); - - let _ = synthesis_engine.load_openjtalk_dict(""); - assert_eq!(synthesis_engine.is_openjtalk_dict_loaded(), false); } #[rstest] - fn create_accent_phrases_works() { - let mut core = InferenceCore::new(true, None); - core.initialize(false, 0, true).unwrap(); - let mut synthesis_engine = SynthesisEngine::new(core, OpenJtalk::initialize()); + #[tokio::test] + async fn create_accent_phrases_works() { + let core = InferenceCore::new_with_initialize(false, 0, true) + .await + .unwrap(); + let synthesis_engine = SynthesisEngine::new( + core, + OpenJtalk::new_with_initialize(OPEN_JTALK_DIC_DIR) + .unwrap() + .into(), + ); - let _ = synthesis_engine.load_openjtalk_dict(OPEN_JTALK_DIC_DIR); let accent_phrases = synthesis_engine - .create_accent_phrases("同じ、文章、です。完全に、同一です。", 0) + .create_accent_phrases("同じ、文章、です。完全に、同一です。", StyleId::new(1)) + .await .unwrap(); assert_eq!(accent_phrases.len(), 5); diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index f4e3f882b..6c4916412 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -30,6 +30,25 @@ pub enum Error { #[source] source: anyhow::Error, }, + #[error("{} ({})", base_error_message(VOICEVOX_ALREADY_LOADED_MODEL_ERROR), path.display())] + AlreadyLoadedModel { path: PathBuf }, + + #[error("{} ({model_id:?})", base_error_message(VOICEVOX_UNLOADED_MODEL_ERROR))] + UnloadedModel { model_id: VoiceModelId }, + + #[error("{}({path}):{source}", base_error_message(VOICEVOX_OPEN_FILE_ERROR))] + OpenFile { + path: PathBuf, + #[source] + source: anyhow::Error, + }, + + #[error("{},{filename}", base_error_message(VOICEVOX_VVM_MODEL_READ_ERROR))] + VvmRead { + filename: String, + #[source] + source: Option, + }, #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))] LoadMetas(#[source] anyhow::Error), @@ -40,14 +59,11 @@ pub enum Error { )] GetSupportedDevices(#[source] anyhow::Error), - #[error("{}", base_error_message(VOICEVOX_RESULT_UNINITIALIZED_STATUS_ERROR))] - UninitializedStatus, - #[error( - "{}: {speaker_id}", - base_error_message(VOICEVOX_RESULT_INVALID_SPEAKER_ID_ERROR) + "{}: {style_id:?}", + base_error_message(VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR) )] - InvalidSpeakerId { speaker_id: u32 }, + InvalidStyleId { style_id: StyleId }, #[error( "{}: {model_index}", @@ -69,6 +85,6 @@ pub enum Error { } fn base_error_message(result_code: VoicevoxResultCode) -> &'static str { - let c_message: &'static str = crate::error_result_to_message(result_code); + let c_message: &'static str = crate::result_code::error_result_to_message(result_code); &c_message[..(c_message.len() - 1)] } diff --git a/crates/voicevox_core/src/inference_core.rs b/crates/voicevox_core/src/inference_core.rs new file mode 100644 index 000000000..e22269486 --- /dev/null +++ b/crates/voicevox_core/src/inference_core.rs @@ -0,0 +1,231 @@ +use self::status::*; +use super::*; +use onnxruntime::{ + ndarray, + session::{AnyArray, NdArray}, +}; + +const PHONEME_LENGTH_MINIMAL: f32 = 0.01; + +pub struct InferenceCore { + status: Status, +} + +impl InferenceCore { + pub(crate) async fn new_with_initialize( + use_gpu: bool, + cpu_num_threads: u16, + load_all_models: bool, + ) -> Result { + if !use_gpu || Self::can_support_gpu_feature()? { + let mut status = Status::new(use_gpu, cpu_num_threads); + + if load_all_models { + for model in &VoiceModel::get_all_models().await? { + status.load_model(model).await?; + } + } + Ok(Self { status }) + } else { + Err(Error::GpuSupport) + } + } + + fn can_support_gpu_feature() -> Result { + let supported_devices = SupportedDevices::get_supported_devices()?; + + cfg_if! { + if #[cfg(feature = "directml")]{ + Ok(*supported_devices.dml()) + } else{ + Ok(*supported_devices.cuda()) + } + } + } + + pub async fn load_model(&mut self, model: &VoiceModel) -> Result<()> { + self.status.load_model(model).await + } + + pub fn unload_model(&mut self, voice_model_id: &VoiceModelId) -> Result<()> { + self.status.unload_model(voice_model_id) + } + pub fn metas(&self) -> &VoiceModelMeta { + self.status.metas() + } + + pub fn is_loaded_model(&self, model_id: &VoiceModelId) -> bool { + self.status.is_loaded_model(model_id) + } + + pub fn is_model_loaded_by_style_id(&self, style_id: StyleId) -> bool { + self.status.is_loaded_model_by_style_id(style_id) + } + + pub async fn predict_duration( + &self, + phoneme_vector: &[i64], + style_id: StyleId, + ) -> Result> { + if !self.status.validate_speaker_id(style_id) { + return Err(Error::InvalidStyleId { style_id }); + } + + let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector)); + let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64])); + + let input_tensors: Vec<&mut dyn AnyArray> = + vec![&mut phoneme_vector_array, &mut speaker_id_array]; + + let mut output = self + .status + .predict_duration_session_run(style_id, input_tensors)?; + + for output_item in output.iter_mut() { + if *output_item < PHONEME_LENGTH_MINIMAL { + *output_item = PHONEME_LENGTH_MINIMAL; + } + } + + Ok(output) + } + + #[allow(clippy::too_many_arguments)] + pub async fn predict_intonation( + &self, + length: usize, + vowel_phoneme_vector: &[i64], + consonant_phoneme_vector: &[i64], + start_accent_vector: &[i64], + end_accent_vector: &[i64], + start_accent_phrase_vector: &[i64], + end_accent_phrase_vector: &[i64], + style_id: StyleId, + ) -> Result> { + if !self.status.validate_speaker_id(style_id) { + return Err(Error::InvalidStyleId { style_id }); + } + + let mut length_array = NdArray::new(ndarray::arr0(length as i64)); + let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector)); + let mut consonant_phoneme_vector_array = + NdArray::new(ndarray::arr1(consonant_phoneme_vector)); + let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector)); + let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector)); + let mut start_accent_phrase_vector_array = + NdArray::new(ndarray::arr1(start_accent_phrase_vector)); + let mut end_accent_phrase_vector_array = + NdArray::new(ndarray::arr1(end_accent_phrase_vector)); + let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64])); + + let input_tensors: Vec<&mut dyn AnyArray> = vec![ + &mut length_array, + &mut vowel_phoneme_vector_array, + &mut consonant_phoneme_vector_array, + &mut start_accent_vector_array, + &mut end_accent_vector_array, + &mut start_accent_phrase_vector_array, + &mut end_accent_phrase_vector_array, + &mut speaker_id_array, + ]; + + self.status + .predict_intonation_session_run(style_id, input_tensors) + } + + pub async fn decode( + &self, + length: usize, + phoneme_size: usize, + f0: &[f32], + phoneme_vector: &[f32], + style_id: StyleId, + ) -> Result> { + if !self.status.validate_speaker_id(style_id) { + return Err(Error::InvalidStyleId { style_id }); + } + + // 音が途切れてしまうのを避けるworkaround処理が入っている + // TODO: 改善したらここのpadding処理を取り除く + const PADDING_SIZE: f64 = 0.4; + const DEFAULT_SAMPLING_RATE: f64 = 24000.0; + let padding_size = ((PADDING_SIZE * DEFAULT_SAMPLING_RATE) / 256.0).round() as usize; + let start_and_end_padding_size = 2 * padding_size; + let length_with_padding = length + start_and_end_padding_size; + let f0_with_padding = Self::make_f0_with_padding(f0, length_with_padding, padding_size); + + let phoneme_with_padding = Self::make_phoneme_with_padding( + phoneme_vector, + phoneme_size, + length_with_padding, + padding_size, + ); + + let mut f0_array = NdArray::new( + ndarray::arr1(&f0_with_padding) + .into_shape([length_with_padding, 1]) + .unwrap(), + ); + let mut phoneme_array = NdArray::new( + ndarray::arr1(&phoneme_with_padding) + .into_shape([length_with_padding, phoneme_size]) + .unwrap(), + ); + let mut speaker_id_array = NdArray::new(ndarray::arr1(&[style_id.raw_id() as i64])); + + let input_tensors: Vec<&mut dyn AnyArray> = + vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array]; + + self.status + .decode_session_run(style_id, input_tensors) + .map(|output| Self::trim_padding_from_output(output, padding_size)) + } + + fn make_f0_with_padding( + f0_slice: &[f32], + length_with_padding: usize, + padding_size: usize, + ) -> Vec { + // 音が途切れてしまうのを避けるworkaround処理 + // 改善したらこの関数を削除する + let mut f0_with_padding = Vec::with_capacity(length_with_padding); + let padding = vec![0.0; padding_size]; + f0_with_padding.extend_from_slice(&padding); + f0_with_padding.extend_from_slice(f0_slice); + f0_with_padding.extend_from_slice(&padding); + f0_with_padding + } + + fn make_phoneme_with_padding( + phoneme_slice: &[f32], + phoneme_size: usize, + length_with_padding: usize, + padding_size: usize, + ) -> Vec { + // 音が途切れてしまうのを避けるworkaround処理 + // 改善したらこの関数を削除する + let mut padding_phoneme = vec![0.0; phoneme_size]; + padding_phoneme[0] = 1.0; + let padding_phoneme_len = padding_phoneme.len(); + let padding_phonemes: Vec = padding_phoneme + .into_iter() + .cycle() + .take(padding_phoneme_len * padding_size) + .collect(); + let mut phoneme_with_padding = Vec::with_capacity(phoneme_size * length_with_padding); + phoneme_with_padding.extend_from_slice(&padding_phonemes); + phoneme_with_padding.extend_from_slice(phoneme_slice); + phoneme_with_padding.extend_from_slice(&padding_phonemes); + + phoneme_with_padding + } + + fn trim_padding_from_output(mut output: Vec, padding_f0_size: usize) -> Vec { + // 音が途切れてしまうのを避けるworkaround処理 + // 改善したらこの関数を削除する + let padding_sampling_size = padding_f0_size * 256; + output + .drain(padding_sampling_size..output.len() - padding_sampling_size) + .collect() + } +} diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index aa1cb021c..ec8928d79 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -1,23 +1,42 @@ #![deny(unsafe_code)] +mod devices; /// cbindgen:ignore mod engine; mod error; +mod inference_core; mod macros; +mod manifest; +mod metas; mod numerics; -mod publish; mod result; pub mod result_code; mod status; +mod version; +mod voice_model; +mod voice_synthesizer; -pub use self::publish::*; +use self::inference_core::*; -pub use self::engine::{AccentPhraseModel, AudioQueryModel}; +#[cfg(test)] +mod test_util; + +#[cfg(test)] +use self::test_util::*; + +pub use self::engine::{AccentPhraseModel, AudioQueryModel, OpenJtalk}; pub use self::error::*; +pub use self::metas::*; pub use self::result::*; +pub use self::voice_model::*; +pub use devices::*; +pub use manifest::*; +pub use version::*; +pub use voice_synthesizer::*; use derive_getters::*; use derive_new::new; +use nanoid::nanoid; #[cfg(test)] use rstest::*; diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs new file mode 100644 index 000000000..4e9ccd158 --- /dev/null +++ b/crates/voicevox_core/src/manifest.rs @@ -0,0 +1,30 @@ +use std::fmt::Display; + +use derive_getters::Getters; +use derive_new::new; +use serde::Deserialize; + +pub type RawManifestVersion = String; +#[derive(Deserialize, Clone, Debug, PartialEq, new)] +pub struct ManifestVersion(RawManifestVersion); + +impl ManifestVersion { + pub fn raw_manifest_version(&self) -> &RawManifestVersion { + &self.0 + } +} + +impl Display for ManifestVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Deserialize, Getters, Clone)] +pub struct Manifest { + manifest_version: ManifestVersion, + metas_filename: String, + decode_filename: String, + predict_duration_filename: String, + predict_intonation_filename: String, +} diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs new file mode 100644 index 000000000..2ee336906 --- /dev/null +++ b/crates/voicevox_core/src/metas.rs @@ -0,0 +1,59 @@ +use std::fmt::Display; + +use super::*; +use derive_getters::Getters; +use serde::{Deserialize, Serialize}; + +/// スタイルIdの実体 +pub type RawStyleId = u32; +/// スタイルId +#[derive(PartialEq, Eq, Clone, Copy, Ord, PartialOrd, Deserialize, Serialize, new, Debug)] +pub struct StyleId(RawStyleId); + +impl StyleId { + pub fn raw_id(self) -> RawStyleId { + self.0 + } +} + +impl Display for StyleId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.raw_id()) + } +} + +pub type RawStyleVersion = String; + +#[derive(PartialEq, Eq, Clone, Ord, PartialOrd, Deserialize, Serialize, new, Debug)] +pub struct StyleVersion(RawStyleVersion); + +impl StyleVersion { + pub fn raw_version(&self) -> &RawStyleVersion { + &self.0 + } +} + +impl Display for StyleVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.raw_version()) + } +} + +/// 音声合成モデルのメタ情報 +pub type VoiceModelMeta = Vec; + +/// スピーカーのメタ情報 +#[derive(Deserialize, Serialize, Getters, Clone)] +pub struct SpeakerMeta { + name: String, + styles: Vec, + version: StyleVersion, + speaker_uuid: String, +} + +/// スタイルのメタ情報 +#[derive(Deserialize, Serialize, Getters, Clone)] +pub struct StyleMeta { + id: StyleId, + name: String, +} diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs deleted file mode 100644 index 50db061fe..000000000 --- a/crates/voicevox_core/src/publish.rs +++ /dev/null @@ -1,1259 +0,0 @@ -use self::engine::*; -use self::result_code::VoicevoxResultCode; -use self::status::*; -use super::*; -use once_cell::sync::Lazy; -use onnxruntime::{ - ndarray, - session::{AnyArray, NdArray}, -}; -use std::ffi::{CStr, CString}; -use std::path::PathBuf; -use std::sync::Mutex; - -const PHONEME_LENGTH_MINIMAL: f32 = 0.01; - -pub struct VoicevoxCore { - synthesis_engine: SynthesisEngine, - use_gpu: bool, -} - -impl VoicevoxCore { - pub fn new_with_initialize(options: InitializeOptions) -> Result { - let mut this = Self::new(); - this.initialize(options)?; - Ok(this) - } - - pub fn new_with_mutex() -> Mutex { - Mutex::new(Self::new()) - } - - fn new() -> Self { - #[cfg(windows)] - list_windows_video_cards(); - - Self { - synthesis_engine: SynthesisEngine::new( - InferenceCore::new(false, None), - OpenJtalk::initialize(), - ), - use_gpu: false, - } - } - - pub fn initialize(&mut self, options: InitializeOptions) -> Result<()> { - let use_gpu = match options.acceleration_mode { - AccelerationMode::Auto => { - let supported_devices = SupportedDevices::get_supported_devices()?; - - cfg_if! { - if #[cfg(feature="directml")]{ - *supported_devices.dml() - - } else { - *supported_devices.cuda() - } - } - } - AccelerationMode::Cpu => false, - AccelerationMode::Gpu => true, - }; - self.use_gpu = use_gpu; - self.synthesis_engine.inference_core_mut().initialize( - use_gpu, - options.cpu_num_threads, - options.load_all_models, - )?; - if let Some(open_jtalk_dict_dir) = options.open_jtalk_dict_dir { - self.synthesis_engine - .load_openjtalk_dict(open_jtalk_dict_dir)?; - } - Ok(()) - } - - pub fn is_gpu_mode(&self) -> bool { - self.use_gpu - } - - pub fn load_model(&mut self, speaker_id: u32) -> Result<()> { - self.synthesis_engine - .inference_core_mut() - .load_model(speaker_id) - } - - pub fn is_model_loaded(&self, speaker_id: u32) -> bool { - self.synthesis_engine - .inference_core() - .is_model_loaded(speaker_id) - } - - pub fn finalize(&mut self) { - self.synthesis_engine.inference_core_mut().finalize() - } - - pub const fn get_version() -> &'static str { - env!("CARGO_PKG_VERSION") - } - - pub fn get_metas_json(&self) -> &'static CStr { - &METAS_CSTRING - } - - pub fn get_supported_devices_json(&self) -> &'static CStr { - &SUPPORTED_DEVICES_CSTRING - } - - pub fn predict_duration( - &mut self, - phoneme_vector: &[i64], - speaker_id: u32, - ) -> Result> { - self.synthesis_engine - .inference_core_mut() - .predict_duration(phoneme_vector, speaker_id) - } - - #[allow(clippy::too_many_arguments)] - pub fn predict_intonation( - &mut self, - length: usize, - vowel_phoneme_vector: &[i64], - consonant_phoneme_vector: &[i64], - start_accent_vector: &[i64], - end_accent_vector: &[i64], - start_accent_phrase_vector: &[i64], - end_accent_phrase_vector: &[i64], - speaker_id: u32, - ) -> Result> { - self.synthesis_engine - .inference_core_mut() - .predict_intonation( - length, - vowel_phoneme_vector, - consonant_phoneme_vector, - start_accent_vector, - end_accent_vector, - start_accent_phrase_vector, - end_accent_phrase_vector, - speaker_id, - ) - } - - pub fn decode( - &mut self, - length: usize, - phoneme_size: usize, - f0: &[f32], - phoneme_vector: &[f32], - speaker_id: u32, - ) -> Result> { - self.synthesis_engine.inference_core_mut().decode( - length, - phoneme_size, - f0, - phoneme_vector, - speaker_id, - ) - } - - pub fn audio_query( - &mut self, - text: &str, - speaker_id: u32, - options: AudioQueryOptions, - ) -> Result { - let accent_phrases = self.accent_phrases( - text, - speaker_id, - AccentPhrasesOptions { kana: options.kana }, - )?; - let kana = create_kana(&accent_phrases); - - Ok(AudioQueryModel::new( - accent_phrases, - 1., - 0., - 1., - 1., - 0.1, - 0.1, - SynthesisEngine::DEFAULT_SAMPLING_RATE, - false, - Some(kana), - )) - } - - pub fn accent_phrases( - &mut self, - text: &str, - speaker_id: u32, - options: AccentPhrasesOptions, - ) -> Result> { - if !self.synthesis_engine.is_openjtalk_dict_loaded() { - return Err(Error::NotLoadedOpenjtalkDict); - } - - let accent_phrases = if options.kana { - self.synthesis_engine - .replace_mora_data(&parse_kana(text)?, speaker_id)? - } else { - self.synthesis_engine - .create_accent_phrases(text, speaker_id)? - }; - - Ok(accent_phrases) - } - - pub fn mora_length( - &mut self, - speaker_id: u32, - accent_phrases: &[AccentPhraseModel], - ) -> Result> { - let accent_phrases = self - .synthesis_engine - .replace_phoneme_length(accent_phrases, speaker_id)?; - - Ok(accent_phrases) - } - - pub fn mora_pitch( - &mut self, - speaker_id: u32, - accent_phrases: &[AccentPhraseModel], - ) -> Result> { - let accent_phrases = self - .synthesis_engine - .replace_mora_pitch(accent_phrases, speaker_id)?; - - Ok(accent_phrases) - } - - pub fn mora_data( - &mut self, - speaker_id: u32, - accent_phrases: &[AccentPhraseModel], - ) -> Result> { - let accent_phrases = self - .synthesis_engine - .replace_mora_data(accent_phrases, speaker_id)?; - - Ok(accent_phrases) - } - - pub fn synthesis( - &mut self, - audio_query: &AudioQueryModel, - speaker_id: u32, - options: SynthesisOptions, - ) -> Result> { - self.synthesis_engine.synthesis_wave_format( - audio_query, - speaker_id, - options.enable_interrogative_upspeak, - ) - } - - pub fn tts(&mut self, text: &str, speaker_id: u32, options: TtsOptions) -> Result> { - let audio_query = &self.audio_query(text, speaker_id, AudioQueryOptions::from(&options))?; - self.synthesis(audio_query, speaker_id, SynthesisOptions::from(&options)) - } -} - -#[derive(Default)] -pub struct AudioQueryOptions { - pub kana: bool, -} - -impl From<&TtsOptions> for AudioQueryOptions { - fn from(options: &TtsOptions) -> Self { - Self { kana: options.kana } - } -} - -#[derive(Default)] -pub struct AccentPhrasesOptions { - pub kana: bool, -} - -impl From<&TtsOptions> for AccentPhrasesOptions { - fn from(options: &TtsOptions) -> Self { - Self { kana: options.kana } - } -} - -#[derive(Default, Debug, PartialEq, Eq)] -pub enum AccelerationMode { - #[default] - Auto, - Cpu, - Gpu, -} - -#[derive(Default)] -pub struct InitializeOptions { - pub acceleration_mode: AccelerationMode, - pub cpu_num_threads: u16, - pub load_all_models: bool, - pub open_jtalk_dict_dir: Option, -} - -pub struct SynthesisOptions { - pub enable_interrogative_upspeak: bool, -} - -impl From<&TtsOptions> for SynthesisOptions { - fn from(options: &TtsOptions) -> Self { - Self { - enable_interrogative_upspeak: options.enable_interrogative_upspeak, - } - } -} - -pub struct TtsOptions { - pub kana: bool, - pub enable_interrogative_upspeak: bool, -} - -impl Default for TtsOptions { - fn default() -> Self { - Self { - enable_interrogative_upspeak: true, - kana: Default::default(), - } - } -} - -#[derive(new)] -pub struct InferenceCore { - initialized: bool, - status_option: Option, -} - -impl InferenceCore { - pub fn initialize( - &mut self, - use_gpu: bool, - cpu_num_threads: u16, - load_all_models: bool, - ) -> Result<()> { - self.initialized = false; - if !use_gpu || self.can_support_gpu_feature()? { - let mut status = Status::new(use_gpu, cpu_num_threads); - - status.load_metas()?; - - if load_all_models { - for model_index in 0..MODEL_FILE_SET.models_count() { - status.load_model(model_index)?; - } - } - - self.status_option = Some(status); - self.initialized = true; - Ok(()) - } else { - Err(Error::GpuSupport) - } - } - fn can_support_gpu_feature(&self) -> Result { - let supported_devices = SupportedDevices::get_supported_devices()?; - - cfg_if! { - if #[cfg(feature = "directml")]{ - Ok(*supported_devices.dml()) - } else{ - Ok(*supported_devices.cuda()) - } - } - } - pub fn load_model(&mut self, speaker_id: u32) -> Result<()> { - if self.initialized { - let status = self - .status_option - .as_mut() - .ok_or(Error::UninitializedStatus)?; - if let Some((model_index, _)) = get_model_index_and_speaker_id(speaker_id) { - status.load_model(model_index) - } else { - Err(Error::InvalidSpeakerId { speaker_id }) - } - } else { - Err(Error::UninitializedStatus) - } - } - pub fn is_model_loaded(&self, speaker_id: u32) -> bool { - if let Some(status) = self.status_option.as_ref() { - if let Some((model_index, _)) = get_model_index_and_speaker_id(speaker_id) { - status.is_model_loaded(model_index) - } else { - false - } - } else { - false - } - } - pub fn finalize(&mut self) { - self.initialized = false; - self.status_option = None; - } - - pub fn predict_duration( - &mut self, - phoneme_vector: &[i64], - speaker_id: u32, - ) -> Result> { - if !self.initialized { - return Err(Error::UninitializedStatus); - } - - let status = self - .status_option - .as_mut() - .ok_or(Error::UninitializedStatus)?; - - if !status.validate_speaker_id(speaker_id) { - return Err(Error::InvalidSpeakerId { speaker_id }); - } - - let (model_index, speaker_id) = - if let Some((model_index, speaker_id)) = get_model_index_and_speaker_id(speaker_id) { - (model_index, speaker_id) - } else { - return Err(Error::InvalidSpeakerId { speaker_id }); - }; - - if model_index >= MODEL_FILE_SET.models_count() { - return Err(Error::InvalidModelIndex { model_index }); - } - - let mut phoneme_vector_array = NdArray::new(ndarray::arr1(phoneme_vector)); - let mut speaker_id_array = NdArray::new(ndarray::arr1(&[speaker_id as i64])); - - let input_tensors: Vec<&mut dyn AnyArray> = - vec![&mut phoneme_vector_array, &mut speaker_id_array]; - - let mut output = status.predict_duration_session_run(model_index, input_tensors)?; - - for output_item in output.iter_mut() { - if *output_item < PHONEME_LENGTH_MINIMAL { - *output_item = PHONEME_LENGTH_MINIMAL; - } - } - - Ok(output) - } - - #[allow(clippy::too_many_arguments)] - pub fn predict_intonation( - &mut self, - length: usize, - vowel_phoneme_vector: &[i64], - consonant_phoneme_vector: &[i64], - start_accent_vector: &[i64], - end_accent_vector: &[i64], - start_accent_phrase_vector: &[i64], - end_accent_phrase_vector: &[i64], - speaker_id: u32, - ) -> Result> { - if !self.initialized { - return Err(Error::UninitializedStatus); - } - - let status = self - .status_option - .as_mut() - .ok_or(Error::UninitializedStatus)?; - - if !status.validate_speaker_id(speaker_id) { - return Err(Error::InvalidSpeakerId { speaker_id }); - } - - let (model_index, speaker_id) = - if let Some((model_index, speaker_id)) = get_model_index_and_speaker_id(speaker_id) { - (model_index, speaker_id) - } else { - return Err(Error::InvalidSpeakerId { speaker_id }); - }; - - if model_index >= MODEL_FILE_SET.models_count() { - return Err(Error::InvalidModelIndex { model_index }); - } - - let mut length_array = NdArray::new(ndarray::arr0(length as i64)); - let mut vowel_phoneme_vector_array = NdArray::new(ndarray::arr1(vowel_phoneme_vector)); - let mut consonant_phoneme_vector_array = - NdArray::new(ndarray::arr1(consonant_phoneme_vector)); - let mut start_accent_vector_array = NdArray::new(ndarray::arr1(start_accent_vector)); - let mut end_accent_vector_array = NdArray::new(ndarray::arr1(end_accent_vector)); - let mut start_accent_phrase_vector_array = - NdArray::new(ndarray::arr1(start_accent_phrase_vector)); - let mut end_accent_phrase_vector_array = - NdArray::new(ndarray::arr1(end_accent_phrase_vector)); - let mut speaker_id_array = NdArray::new(ndarray::arr1(&[speaker_id as i64])); - - let input_tensors: Vec<&mut dyn AnyArray> = vec![ - &mut length_array, - &mut vowel_phoneme_vector_array, - &mut consonant_phoneme_vector_array, - &mut start_accent_vector_array, - &mut end_accent_vector_array, - &mut start_accent_phrase_vector_array, - &mut end_accent_phrase_vector_array, - &mut speaker_id_array, - ]; - - status.predict_intonation_session_run(model_index, input_tensors) - } - - pub fn decode( - &mut self, - length: usize, - phoneme_size: usize, - f0: &[f32], - phoneme_vector: &[f32], - speaker_id: u32, - ) -> Result> { - if !self.initialized { - return Err(Error::UninitializedStatus); - } - - let status = self - .status_option - .as_mut() - .ok_or(Error::UninitializedStatus)?; - - if !status.validate_speaker_id(speaker_id) { - return Err(Error::InvalidSpeakerId { speaker_id }); - } - - let (model_index, speaker_id) = - if let Some((model_index, speaker_id)) = get_model_index_and_speaker_id(speaker_id) { - (model_index, speaker_id) - } else { - return Err(Error::InvalidSpeakerId { speaker_id }); - }; - - if model_index >= MODEL_FILE_SET.models_count() { - return Err(Error::InvalidModelIndex { model_index }); - } - - // 音が途切れてしまうのを避けるworkaround処理が入っている - // TODO: 改善したらここのpadding処理を取り除く - const PADDING_SIZE: f64 = 0.4; - const DEFAULT_SAMPLING_RATE: f64 = 24000.0; - let padding_size = ((PADDING_SIZE * DEFAULT_SAMPLING_RATE) / 256.0).round() as usize; - let start_and_end_padding_size = 2 * padding_size; - let length_with_padding = length + start_and_end_padding_size; - let f0_with_padding = Self::make_f0_with_padding(f0, length_with_padding, padding_size); - - let phoneme_with_padding = Self::make_phoneme_with_padding( - phoneme_vector, - phoneme_size, - length_with_padding, - padding_size, - ); - - let mut f0_array = NdArray::new( - ndarray::arr1(&f0_with_padding) - .into_shape([length_with_padding, 1]) - .unwrap(), - ); - let mut phoneme_array = NdArray::new( - ndarray::arr1(&phoneme_with_padding) - .into_shape([length_with_padding, phoneme_size]) - .unwrap(), - ); - let mut speaker_id_array = NdArray::new(ndarray::arr1(&[speaker_id as i64])); - - let input_tensors: Vec<&mut dyn AnyArray> = - vec![&mut f0_array, &mut phoneme_array, &mut speaker_id_array]; - - status - .decode_session_run(model_index, input_tensors) - .map(|output| Self::trim_padding_from_output(output, padding_size)) - } - - fn make_f0_with_padding( - f0_slice: &[f32], - length_with_padding: usize, - padding_size: usize, - ) -> Vec { - // 音が途切れてしまうのを避けるworkaround処理 - // 改善したらこの関数を削除する - let mut f0_with_padding = Vec::with_capacity(length_with_padding); - let padding = vec![0.0; padding_size]; - f0_with_padding.extend_from_slice(&padding); - f0_with_padding.extend_from_slice(f0_slice); - f0_with_padding.extend_from_slice(&padding); - f0_with_padding - } - - fn make_phoneme_with_padding( - phoneme_slice: &[f32], - phoneme_size: usize, - length_with_padding: usize, - padding_size: usize, - ) -> Vec { - // 音が途切れてしまうのを避けるworkaround処理 - // 改善したらこの関数を削除する - let mut padding_phoneme = vec![0.0; phoneme_size]; - padding_phoneme[0] = 1.0; - let padding_phoneme_len = padding_phoneme.len(); - let padding_phonemes: Vec = padding_phoneme - .into_iter() - .cycle() - .take(padding_phoneme_len * padding_size) - .collect(); - let mut phoneme_with_padding = Vec::with_capacity(phoneme_size * length_with_padding); - phoneme_with_padding.extend_from_slice(&padding_phonemes); - phoneme_with_padding.extend_from_slice(phoneme_slice); - phoneme_with_padding.extend_from_slice(&padding_phonemes); - - phoneme_with_padding - } - - fn trim_padding_from_output(mut output: Vec, padding_f0_size: usize) -> Vec { - // 音が途切れてしまうのを避けるworkaround処理 - // 改善したらこの関数を削除する - let padding_sampling_size = padding_f0_size * 256; - output - .drain(padding_sampling_size..output.len() - padding_sampling_size) - .collect() - } -} - -pub static METAS: &Lazy<&str> = { - static METAS: Lazy<&str> = Lazy::new(|| &MODEL_FILE_SET.metas_str); - &METAS -}; - -pub static METAS_CSTRING: Lazy = - Lazy::new(|| CString::new(&*MODEL_FILE_SET.metas_str).unwrap()); - -pub static SUPPORTED_DEVICES: Lazy = - Lazy::new(|| SupportedDevices::get_supported_devices().unwrap()); - -pub static SUPPORTED_DEVICES_CSTRING: Lazy = - Lazy::new(|| CString::new(SUPPORTED_DEVICES.to_json().to_string()).unwrap()); - -fn get_model_index_and_speaker_id(speaker_id: u32) -> Option<(usize, u32)> { - MODEL_FILE_SET.speaker_id_map.get(&speaker_id).copied() -} - -pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str { - // C APIのため、messageには必ず末尾にNULL文字を追加する - use VoicevoxResultCode::*; - match result_code { - VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => { - "OpenJTalkの辞書が読み込まれていません\0" - } - VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0", - VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0", - - VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0", - VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => { - "サポートされているデバイス情報取得中にエラーが発生しました\0" - } - - VOICEVOX_RESULT_OK => "エラーが発生しませんでした\0", - VOICEVOX_RESULT_UNINITIALIZED_STATUS_ERROR => "Statusが初期化されていません\0", - VOICEVOX_RESULT_INVALID_SPEAKER_ID_ERROR => "無効なspeaker_idです\0", - VOICEVOX_RESULT_INVALID_MODEL_INDEX_ERROR => "無効なmodel_indexです\0", - VOICEVOX_RESULT_INFERENCE_ERROR => "推論に失敗しました\0", - VOICEVOX_RESULT_EXTRACT_FULL_CONTEXT_LABEL_ERROR => { - "入力テキストからのフルコンテキストラベル抽出に失敗しました\0" - } - VOICEVOX_RESULT_INVALID_UTF8_INPUT_ERROR => "入力テキストが無効なUTF-8データでした\0", - VOICEVOX_RESULT_PARSE_KANA_ERROR => { - "入力テキストをAquesTalkライクな読み仮名としてパースすることに失敗しました\0" - } - VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR => "無効なaudio_queryです\0", - VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR => "無効なaccent_phraseです\0", - } -} - -#[cfg(windows)] -fn list_windows_video_cards() { - use std::{ffi::OsString, os::windows::ffi::OsStringExt as _}; - - use humansize::BINARY; - use tracing::{error, info}; - use windows::Win32::Graphics::Dxgi::{ - CreateDXGIFactory, IDXGIFactory, DXGI_ADAPTER_DESC, DXGI_ERROR_NOT_FOUND, - }; - - info!("検出されたGPU (DirectMLには1番目のGPUが使われます):"); - match list_windows_video_cards() { - Ok(descs) => { - for desc in descs { - let description = OsString::from_wide(trim_nul(&desc.Description)); - let vram = humansize::format_size(desc.DedicatedVideoMemory, BINARY); - info!(" - {description:?} ({vram})"); - } - } - Err(err) => error!("{err}"), - } - - fn list_windows_video_cards() -> windows::core::Result> { - #[allow(unsafe_code)] - unsafe { - let factory = CreateDXGIFactory::()?; - (0..) - .map(|i| factory.EnumAdapters(i)?.GetDesc()) - .take_while(|r| !matches!(r, Err(e) if e.code() == DXGI_ERROR_NOT_FOUND)) - .collect() - } - } - - fn trim_nul(s: &[u16]) -> &[u16] { - &s[..s.iter().position(|&c| c == 0x0000).unwrap_or(s.len())] - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::macros::tests::assert_debug_fmt_eq; - use pretty_assertions::assert_eq; - use test_util::OPEN_JTALK_DIC_DIR; - - #[rstest] - fn finalize_works() { - let internal = VoicevoxCore::new_with_mutex(); - let result = internal - .lock() - .unwrap() - .initialize(InitializeOptions::default()); - assert_debug_fmt_eq!(Ok(()), result); - internal.lock().unwrap().finalize(); - assert_eq!( - false, - internal - .lock() - .unwrap() - .synthesis_engine - .inference_core() - .initialized - ); - assert_eq!( - true, - internal - .lock() - .unwrap() - .synthesis_engine - .inference_core() - .status_option - .is_none() - ); - } - - #[rstest] - #[case(0, Err(Error::UninitializedStatus), Ok(()))] - #[case(1, Err(Error::UninitializedStatus), Ok(()))] - #[case(999, Err(Error::UninitializedStatus), Err(Error::InvalidSpeakerId{speaker_id:999}))] - fn load_model_works( - #[case] speaker_id: u32, - #[case] expected_result_at_uninitialized: Result<()>, - #[case] expected_result_at_initialized: Result<()>, - ) { - let internal = VoicevoxCore::new_with_mutex(); - let result = internal.lock().unwrap().load_model(speaker_id); - assert_debug_fmt_eq!(expected_result_at_uninitialized, result); - - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - ..Default::default() - }) - .unwrap(); - let result = internal.lock().unwrap().load_model(speaker_id); - assert_debug_fmt_eq!( - expected_result_at_initialized, - result, - "got load_model result", - ); - } - - #[rstest] - fn is_use_gpu_works() { - let internal = VoicevoxCore::new_with_mutex(); - assert_eq!(false, internal.lock().unwrap().is_gpu_mode()); - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - ..Default::default() - }) - .unwrap(); - assert_eq!(false, internal.lock().unwrap().is_gpu_mode()); - } - - #[rstest] - #[case(0, true)] - #[case(1, true)] - #[case(999, false)] - fn is_model_loaded_works(#[case] speaker_id: u32, #[case] expected: bool) { - let internal = VoicevoxCore::new_with_mutex(); - assert!( - !internal.lock().unwrap().is_model_loaded(speaker_id), - "expected is_model_loaded to return false, but got true", - ); - - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - ..Default::default() - }) - .unwrap(); - assert!( - !internal.lock().unwrap().is_model_loaded(speaker_id), - "expected is_model_loaded to return false, but got true", - ); - - internal - .lock() - .unwrap() - .load_model(speaker_id) - .unwrap_or(()); - assert_eq!( - internal.lock().unwrap().is_model_loaded(speaker_id), - expected, - "expected is_model_loaded return value against speaker_id `{}` is `{}`, but got `{}`", - speaker_id, - expected, - !expected - ); - } - - #[rstest] - fn supported_devices_works() { - let internal = VoicevoxCore::new_with_mutex(); - let cstr_result = internal.lock().unwrap().get_supported_devices_json(); - assert!(cstr_result.to_str().is_ok(), "{cstr_result:?}"); - - let json_result: std::result::Result = - serde_json::from_str(cstr_result.to_str().unwrap()); - assert!(json_result.is_ok(), "{json_result:?}"); - } - - #[rstest] - #[case(0, Some((0,0)))] - #[case(1, Some((0,1)))] - #[case(999, None)] - fn get_model_index_and_speaker_id_works( - #[case] speaker_id: u32, - #[case] expected: Option<(usize, u32)>, - ) { - let actual = get_model_index_and_speaker_id(speaker_id); - assert_eq!(expected, actual); - } - - #[rstest] - fn predict_duration_works() { - let internal = VoicevoxCore::new_with_mutex(); - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - load_all_models: true, - acceleration_mode: AccelerationMode::Cpu, - ..Default::default() - }) - .unwrap(); - - // 「こんにちは、音声合成の世界へようこそ」という文章を変換して得た phoneme_vector - let phoneme_vector = [ - 0, 23, 30, 4, 28, 21, 10, 21, 42, 7, 0, 30, 4, 35, 14, 14, 16, 30, 30, 35, 14, 14, 28, - 30, 35, 14, 23, 7, 21, 14, 43, 30, 30, 23, 30, 35, 30, 0, - ]; - - let result = internal - .lock() - .unwrap() - .predict_duration(&phoneme_vector, 0); - - assert!(result.is_ok(), "{result:?}"); - assert_eq!(result.unwrap().len(), phoneme_vector.len()); - } - - #[rstest] - fn predict_intonation_works() { - let internal = VoicevoxCore::new_with_mutex(); - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - load_all_models: true, - acceleration_mode: AccelerationMode::Cpu, - ..Default::default() - }) - .unwrap(); - - // 「テスト」という文章に対応する入力 - let vowel_phoneme_vector = [0, 14, 6, 30, 0]; - let consonant_phoneme_vector = [-1, 37, 35, 37, -1]; - let start_accent_vector = [0, 1, 0, 0, 0]; - let end_accent_vector = [0, 1, 0, 0, 0]; - let start_accent_phrase_vector = [0, 1, 0, 0, 0]; - let end_accent_phrase_vector = [0, 0, 0, 1, 0]; - - let result = internal.lock().unwrap().predict_intonation( - vowel_phoneme_vector.len(), - &vowel_phoneme_vector, - &consonant_phoneme_vector, - &start_accent_vector, - &end_accent_vector, - &start_accent_phrase_vector, - &end_accent_phrase_vector, - 0, - ); - - assert!(result.is_ok(), "{result:?}"); - assert_eq!(result.unwrap().len(), vowel_phoneme_vector.len()); - } - - #[rstest] - fn decode_works() { - let internal = VoicevoxCore::new_with_mutex(); - internal - .lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - ..Default::default() - }) - .unwrap(); - - // 「テスト」という文章に対応する入力 - const F0_LENGTH: usize = 69; - let mut f0 = [0.; F0_LENGTH]; - f0[9..24].fill(5.905218); - f0[37..60].fill(5.565851); - - const PHONEME_SIZE: usize = 45; - let mut phoneme = [0.; PHONEME_SIZE * F0_LENGTH]; - let mut set_one = |index, range| { - for i in range { - phoneme[i * PHONEME_SIZE + index] = 1.; - } - }; - set_one(0, 0..9); - set_one(37, 9..13); - set_one(14, 13..24); - set_one(35, 24..30); - set_one(6, 30..37); - set_one(37, 37..45); - set_one(30, 45..60); - set_one(0, 60..69); - - let result = internal - .lock() - .unwrap() - .decode(F0_LENGTH, PHONEME_SIZE, &f0, &phoneme, 0); - - assert!(result.is_ok(), "{result:?}"); - assert_eq!(result.unwrap().len(), F0_LENGTH * 256); - } - - type TextConsonantVowelData = - [(&'static [(&'static str, &'static str, &'static str)], usize)]; - - // [([(テキスト, 母音, 子音), ...], アクセントの位置), ...] の形式 - const TEXT_CONSONANT_VOWEL_DATA1: &TextConsonantVowelData = &[ - (&[("コ", "k", "o"), ("レ", "r", "e"), ("ワ", "w", "a")], 3), - ( - &[ - ("テ", "t", "e"), - ("ス", "s", "U"), - ("ト", "t", "o"), - ("デ", "d", "e"), - ("ス", "s", "U"), - ], - 1, - ), - ]; - - const TEXT_CONSONANT_VOWEL_DATA2: &TextConsonantVowelData = &[ - (&[("コ", "k", "o"), ("レ", "r", "e"), ("ワ", "w", "a")], 1), - ( - &[ - ("テ", "t", "e"), - ("ス", "s", "U"), - ("ト", "t", "o"), - ("デ", "d", "e"), - ("ス", "s", "U"), - ], - 3, - ), - ]; - - #[rstest] - #[case( - "これはテストです", - false, - TEXT_CONSONANT_VOWEL_DATA1, - "コレワ'/テ'_ストデ_ス" - )] - #[case( - "コ'レワ/テ_スト'デ_ス", - true, - TEXT_CONSONANT_VOWEL_DATA2, - "コ'レワ/テ_スト'デ_ス" - )] - fn audio_query_works( - #[case] input_text: &str, - #[case] input_kana_option: bool, - #[case] expected_text_consonant_vowel_data: &TextConsonantVowelData, - #[case] expected_kana_text: &str, - ) { - let core = VoicevoxCore::new_with_mutex(); - core.lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - open_jtalk_dict_dir: Some(OPEN_JTALK_DIC_DIR.into()), - ..Default::default() - }) - .unwrap(); - - let query = core - .lock() - .unwrap() - .audio_query( - input_text, - 0, - AudioQueryOptions { - kana: input_kana_option, - }, - ) - .unwrap(); - - assert_eq!( - query.accent_phrases().len(), - expected_text_consonant_vowel_data.len() - ); - - for (accent_phrase, (text_consonant_vowel_slice, accent_pos)) in - std::iter::zip(query.accent_phrases(), expected_text_consonant_vowel_data) - { - assert_eq!( - accent_phrase.moras().len(), - text_consonant_vowel_slice.len() - ); - assert_eq!(accent_phrase.accent(), accent_pos); - - for (mora, (text, consonant, vowel)) in - std::iter::zip(accent_phrase.moras(), *text_consonant_vowel_slice) - { - assert_eq!(mora.text(), text); - // NOTE: 子音の長さが必ず非ゼロになるテストケースを想定している - assert_ne!( - mora.consonant_length(), - &Some(0.), - "expected mora.consonant_length is not Some(0.0), but got Some(0.0)." - ); - assert_eq!(mora.consonant(), &Some(consonant.to_string())); - assert_eq!(mora.vowel(), vowel); - // NOTE: 母音の長さが必ず非ゼロになるテストケースを想定している - assert_ne!( - mora.vowel_length(), - &0., - "expected mora.vowel_length is not 0.0, but got 0.0." - ); - } - } - - assert_eq!(query.kana().as_deref(), Some(expected_kana_text)); - } - - #[rstest] - #[case("これはテストです", false, TEXT_CONSONANT_VOWEL_DATA1)] - #[case("コ'レワ/テ_スト'デ_ス", true, TEXT_CONSONANT_VOWEL_DATA2)] - fn accent_phrases_works( - #[case] input_text: &str, - #[case] input_kana_option: bool, - #[case] expected_text_consonant_vowel_data: &TextConsonantVowelData, - ) { - let core = VoicevoxCore::new_with_mutex(); - core.lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - open_jtalk_dict_dir: Some(OPEN_JTALK_DIC_DIR.into()), - ..Default::default() - }) - .unwrap(); - - let accent_phrases = core - .lock() - .unwrap() - .accent_phrases( - input_text, - 0, - AccentPhrasesOptions { - kana: input_kana_option, - }, - ) - .unwrap(); - - assert_eq!( - accent_phrases.len(), - expected_text_consonant_vowel_data.len() - ); - - for (accent_phrase, (text_consonant_vowel_slice, accent_pos)) in - std::iter::zip(accent_phrases, expected_text_consonant_vowel_data) - { - assert_eq!( - accent_phrase.moras().len(), - text_consonant_vowel_slice.len() - ); - assert_eq!(accent_phrase.accent(), accent_pos); - - for (mora, (text, consonant, vowel)) in - std::iter::zip(accent_phrase.moras(), *text_consonant_vowel_slice) - { - assert_eq!(mora.text(), text); - // NOTE: 子音の長さが必ず非ゼロになるテストケースを想定している - assert_ne!( - mora.consonant_length(), - &Some(0.), - "expected mora.consonant_length is not Some(0.0), but got Some(0.0)." - ); - assert_eq!(mora.consonant(), &Some(consonant.to_string())); - assert_eq!(mora.vowel(), vowel); - // NOTE: 母音の長さが必ず非ゼロになるテストケースを想定している - assert_ne!( - mora.vowel_length(), - &0., - "expected mora.vowel_length is not 0.0, but got 0.0." - ); - } - } - } - - #[rstest] - fn mora_length_works() { - let core = VoicevoxCore::new_with_mutex(); - core.lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - open_jtalk_dict_dir: Some(OPEN_JTALK_DIC_DIR.into()), - ..Default::default() - }) - .unwrap(); - - let accent_phrases = core - .lock() - .unwrap() - .accent_phrases("これはテストです", 0, AccentPhrasesOptions { kana: false }) - .unwrap(); - - let modified_accent_phrases = core - .lock() - .unwrap() - .mora_length(1, &accent_phrases) - .unwrap(); - - // NOTE: 一つでも母音の長さが変わっていれば、動作しているとみなす - assert!( - any_mora_param_changed( - &accent_phrases, - &modified_accent_phrases, - MoraModel::vowel_length - ), - "mora_length() does not work: mora.vowel_length() is not changed." - ); - } - - #[rstest] - fn mora_pitch_works() { - let core = VoicevoxCore::new_with_mutex(); - core.lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - open_jtalk_dict_dir: Some(OPEN_JTALK_DIC_DIR.into()), - ..Default::default() - }) - .unwrap(); - - let accent_phrases = core - .lock() - .unwrap() - .accent_phrases("これはテストです", 0, AccentPhrasesOptions { kana: false }) - .unwrap(); - - let modified_accent_phrases = core.lock().unwrap().mora_pitch(1, &accent_phrases).unwrap(); - - // NOTE: 一つでも音高が変わっていれば、動作しているとみなす - assert!( - any_mora_param_changed(&accent_phrases, &modified_accent_phrases, MoraModel::pitch), - "mora_pitch() does not work: mora.pitch() is not changed." - ); - } - - #[rstest] - fn mora_data_works() { - let core = VoicevoxCore::new_with_mutex(); - core.lock() - .unwrap() - .initialize(InitializeOptions { - acceleration_mode: AccelerationMode::Cpu, - load_all_models: true, - open_jtalk_dict_dir: Some(OPEN_JTALK_DIC_DIR.into()), - ..Default::default() - }) - .unwrap(); - - let accent_phrases = core - .lock() - .unwrap() - .accent_phrases("これはテストです", 0, AccentPhrasesOptions { kana: false }) - .unwrap(); - - let modified_accent_phrases = core.lock().unwrap().mora_data(1, &accent_phrases).unwrap(); - - // NOTE: 一つでも音高が変わっていれば、動作しているとみなす - assert!( - any_mora_param_changed(&accent_phrases, &modified_accent_phrases, MoraModel::pitch), - "mora_data() does not work: mora.pitch() is not changed." - ); - // NOTE: 一つでも母音の長さが変わっていれば、動作しているとみなす - assert!( - any_mora_param_changed( - &accent_phrases, - &modified_accent_phrases, - MoraModel::vowel_length - ), - "mora_data() does not work: mora.vowel_length() is not changed." - ); - } - - fn any_mora_param_changed( - before: &[AccentPhraseModel], - after: &[AccentPhraseModel], - param: fn(&MoraModel) -> &T, - ) -> bool { - std::iter::zip(before, after) - .flat_map(move |(before, after)| std::iter::zip(before.moras(), after.moras())) - .any(|(before, after)| param(before) != param(after)) - } - - #[rstest] - fn get_version_works() { - assert_eq!("0.0.0", VoicevoxCore::get_version()); - } -} diff --git a/crates/voicevox_core/src/result_code.rs b/crates/voicevox_core/src/result_code.rs index 45816e468..aac908295 100644 --- a/crates/voicevox_core/src/result_code.rs +++ b/crates/voicevox_core/src/result_code.rs @@ -17,22 +17,65 @@ pub enum VoicevoxResultCode { VOICEVOX_RESULT_GPU_SUPPORT_ERROR = 4, /// メタ情報読み込みに失敗した VOICEVOX_RESULT_LOAD_METAS_ERROR = 5, - /// ステータスが初期化されていない - VOICEVOX_RESULT_UNINITIALIZED_STATUS_ERROR = 6, - /// 無効なspeaker_idが指定された - VOICEVOX_RESULT_INVALID_SPEAKER_ID_ERROR = 7, + /// 無効なstyle_idが指定された + VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR = 6, /// 無効なmodel_indexが指定された - VOICEVOX_RESULT_INVALID_MODEL_INDEX_ERROR = 8, + VOICEVOX_RESULT_INVALID_MODEL_INDEX_ERROR = 7, /// 推論に失敗した - VOICEVOX_RESULT_INFERENCE_ERROR = 9, + VOICEVOX_RESULT_INFERENCE_ERROR = 8, /// コンテキストラベル出力に失敗した - VOICEVOX_RESULT_EXTRACT_FULL_CONTEXT_LABEL_ERROR = 10, + VOICEVOX_RESULT_EXTRACT_FULL_CONTEXT_LABEL_ERROR = 11, /// 無効なutf8文字列が入力された - VOICEVOX_RESULT_INVALID_UTF8_INPUT_ERROR = 11, + VOICEVOX_RESULT_INVALID_UTF8_INPUT_ERROR = 12, /// aquestalk形式のテキストの解析に失敗した - VOICEVOX_RESULT_PARSE_KANA_ERROR = 12, + VOICEVOX_RESULT_PARSE_KANA_ERROR = 13, /// 無効なAudioQuery - VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR = 13, + VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR = 14, /// 無効なAccentPhrase - VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 14, + VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR = 15, + /// ファイルオープンエラー + VOICEVOX_OPEN_FILE_ERROR = 16, + /// Modelを読み込めなかった + VOICEVOX_VVM_MODEL_READ_ERROR = 17, + /// すでに読み込まれているModelを読み込もうとした + VOICEVOX_ALREADY_LOADED_MODEL_ERROR = 18, + /// Modelが読み込まれていない + VOICEVOX_UNLOADED_MODEL_ERROR = 19, +} + +pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str { + // C APIのため、messageには必ず末尾にNULL文字を追加する + use VoicevoxResultCode::*; + match result_code { + VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => { + "OpenJTalkの辞書が読み込まれていません\0" + } + VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0", + VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0", + + VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0", + VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR => { + "サポートされているデバイス情報取得中にエラーが発生しました\0" + } + + VOICEVOX_RESULT_OK => "エラーが発生しませんでした\0", + VOICEVOX_RESULT_INVALID_STYLE_ID_ERROR => "無効なspeaker_idです\0", + VOICEVOX_RESULT_INVALID_MODEL_INDEX_ERROR => "無効なmodel_indexです\0", + VOICEVOX_RESULT_INFERENCE_ERROR => "推論に失敗しました\0", + VOICEVOX_RESULT_EXTRACT_FULL_CONTEXT_LABEL_ERROR => { + "入力テキストからのフルコンテキストラベル抽出に失敗しました\0" + } + VOICEVOX_RESULT_INVALID_UTF8_INPUT_ERROR => "入力テキストが無効なUTF-8データでした\0", + VOICEVOX_RESULT_PARSE_KANA_ERROR => { + "入力テキストをAquesTalkライクな読み仮名としてパースすることに失敗しました\0" + } + VOICEVOX_RESULT_INVALID_AUDIO_QUERY_ERROR => "無効なaudio_queryです\0", + VOICEVOX_RESULT_INVALID_ACCENT_PHRASE_ERROR => "無効なaccent_phraseです\0", + VOICEVOX_OPEN_FILE_ERROR => "ファイルオープンに失敗しました\0", + VOICEVOX_VVM_MODEL_READ_ERROR => "Modelを読み込めませんでした\0", + VOICEVOX_ALREADY_LOADED_MODEL_ERROR => { + "すでに読み込まれているModelを読み込もうとしました\0" + } + VOICEVOX_UNLOADED_MODEL_ERROR => "Modelが読み込まれていません\0", + } } diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index 4c4704bd2..68f839a6b 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -1,16 +1,12 @@ use super::*; -use anyhow::Context as _; use once_cell::sync::Lazy; use onnxruntime::{ environment::Environment, session::{AnyArray, Session}, GraphOptimizationLevel, LoggingLevel, }; -use serde::{Deserialize, Serialize}; -use std::{ - env, - path::{Path, PathBuf}, -}; +use std::sync::Mutex; +use std::{env, path::Path}; use tracing::error; mod model_file; @@ -20,27 +16,21 @@ cfg_if! { use onnxruntime::CudaProviderOptions; } } -use std::collections::{BTreeMap, BTreeSet}; - -pub(crate) static MODEL_FILE_SET: Lazy = Lazy::new(|| { - let result = ModelFileSet::new(); - if let Err(err) = &result { - error!("ファイルを読み込めなかったためクラッシュします: {err}"); - } - result.unwrap() -}); +use std::collections::BTreeMap; pub struct Status { models: StatusModels, + merged_metas: VoiceModelMeta, light_session_options: SessionOptions, // 軽いモデルはこちらを使う heavy_session_options: SessionOptions, // 重いモデルはこちらを使う - supported_styles: BTreeSet, + id_relations: BTreeMap, } struct StatusModels { - predict_duration: BTreeMap>, - predict_intonation: BTreeMap>, - decode: BTreeMap>, + metas: BTreeMap, + predict_duration: BTreeMap>>, + predict_intonation: BTreeMap>>, + decode: BTreeMap>>, } #[derive(new, Getters)] @@ -49,107 +39,10 @@ struct SessionOptions { use_gpu: bool, } -pub(crate) struct ModelFileSet { - pub(crate) speaker_id_map: BTreeMap, - pub(crate) metas_str: String, - models: Vec, -} - -impl ModelFileSet { - fn new() -> anyhow::Result { - let path = { - let root_dir = if cfg!(test) { - Path::new(env!("CARGO_WORKSPACE_DIR")).join("model") - } else if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) { - root_dir.into() - } else { - process_path::get_dylib_path() - .or_else(process_path::get_executable_path) - .with_context(|| "Could not get the current dynamic library/executable path")? - .parent() - .unwrap_or_else(|| "".as_ref()) - .join("model") - }; - - move |rel_path| root_dir.join(rel_path) - }; - - let metas_str = fs_err::read_to_string(path("metas.json"))?; - - let models = model_file::MODEL_FILE_NAMES - .iter() - .map( - |&ModelFileNames { - predict_duration_model, - predict_intonation_model, - decode_model, - }| { - let predict_duration_model = ModelFile::new(&path(predict_duration_model))?; - let predict_intonation_model = ModelFile::new(&path(predict_intonation_model))?; - let decode_model = ModelFile::new(&path(decode_model))?; - Ok(Model { - predict_duration_model, - predict_intonation_model, - decode_model, - }) - }, - ) - .collect::>()?; - - return Ok(Self { - speaker_id_map: model_file::SPEAKER_ID_MAP.iter().copied().collect(), - metas_str, - models, - }); - - const ROOT_DIR_ENV_NAME: &str = "VV_MODELS_ROOT_DIR"; - } - - pub(crate) fn models_count(&self) -> usize { - self.models.len() - } -} - -struct ModelFileNames { - predict_duration_model: &'static str, - predict_intonation_model: &'static str, - decode_model: &'static str, -} - #[derive(thiserror::Error, Debug)] #[error("不正なモデルファイルです")] struct DecryptModelError; -struct Model { - predict_duration_model: ModelFile, - predict_intonation_model: ModelFile, - decode_model: ModelFile, -} - -struct ModelFile { - path: PathBuf, - content: Vec, -} - -impl ModelFile { - fn new(path: &Path) -> anyhow::Result { - let content = fs_err::read(path)?; - Ok(Self { - path: path.to_owned(), - content, - }) - } -} - -#[derive(Deserialize, Getters)] -struct Meta { - styles: Vec