From e976900113fffbb3565fb768c0e1a7382a476917 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Mar 2024 20:31:53 +0800 Subject: [PATCH] Support including TTS conditionally. --- .github/workflows/linux.yaml | 14 ++-- .github/workflows/macos.yaml | 12 +++- .github/workflows/windows-x64.yaml | 15 +++- .github/workflows/windows-x86.yaml | 15 +++- CMakeLists.txt | 34 +++++++-- c-api-examples/CMakeLists.txt | 6 +- sherpa-onnx/c-api/c-api.cc | 7 +- sherpa-onnx/csrc/CMakeLists.txt | 97 +++++++++++++++++--------- sherpa-onnx/jni/jni.cc | 29 +++++--- sherpa-onnx/python/csrc/CMakeLists.txt | 11 ++- sherpa-onnx/python/csrc/sherpa-onnx.cc | 8 ++- 11 files changed, 178 insertions(+), 70 deletions(-) diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index b1cf6ed91..aa7689509 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -53,6 +53,7 @@ jobs: os: [ubuntu-latest] build_type: [Release, Debug] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -93,7 +94,7 @@ jobs: mkdir build cd build - cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. make -j2 make install @@ -120,7 +121,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} + name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: build/bin/* - name: Test spoken language identification (C API) @@ -175,6 +176,7 @@ jobs: .github/scripts/test-offline-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin:$PATH @@ -226,7 +228,11 @@ jobs: suffix=static fi - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix + else + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix-no-tts + fi mkdir $dst cp -a build/install/bin $dst/ @@ -278,6 +284,6 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }} + name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: tts diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index a80456279..6474de36a 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -52,6 +52,7 @@ jobs: os: [macos-latest] build_type: [Release, Debug] lib_type: [static, shared] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -61,7 +62,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2 with: - key: ${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }} + key: ${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }}-tts-${{ matrix.with_tts }} - name: Configure CMake shell: bash @@ -79,7 +80,7 @@ jobs: BUILD_SHARED_LIBS=ON fi - cmake -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_INSTALL_PREFIX=./install .. + cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_INSTALL_PREFIX=./install .. - name: Build sherpa-onnx for macos shell: bash @@ -135,6 +136,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin:$PATH @@ -196,7 +198,11 @@ jobs: run: | SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }} + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }} + else + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts + fi mkdir $dst cp -a build/install/bin $dst/ diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index 306118621..96b190340 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -47,6 +47,7 @@ jobs: matrix: os: [windows-latest] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -58,7 +59,8 @@ jobs: run: | mkdir build cd build - cmake -A x64 -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + cmake -A x64 -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + - name: Build sherpa-onnx for windows shell: bash run: | @@ -93,6 +95,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH @@ -155,9 +158,15 @@ jobs: shared_lib=${{ matrix.shared_lib }} if [[ $shared_lib == "ON" ]]; then - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-shared + suffix=shared + else + suffix=static + fi + + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix else - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-static + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix-no-tts fi mkdir $dst diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index be0ddc8dd..c93340ce7 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -47,6 +47,7 @@ jobs: matrix: os: [windows-latest] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -58,7 +59,7 @@ jobs: run: | mkdir build cd build - cmake -A Win32 -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -D CMAKE_INSTALL_PREFIX=./install .. + cmake -A Win32 -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -D CMAKE_INSTALL_PREFIX=./install .. - name: Build sherpa-onnx for windows shell: bash @@ -94,6 +95,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH @@ -157,10 +159,17 @@ jobs: dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86 shared_lib=${{ matrix.shared_lib }} + if [[ $shared_lib == "ON" ]]; then - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-shared + suffix=shared + else + suffix=static + fi + + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix else - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-static + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix-no-tts fi mkdir $dst diff --git a/CMakeLists.txt b/CMakeLists.txt index 495cab28d..94613609e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) +option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON) option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-installed onnxruntime if available" ON) @@ -111,10 +112,26 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") +message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") +message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}") +message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}") +message(STATUS "SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY ${SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY}") message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE}") +if(SHERPA_ONNX_ENABLE_TTS) + message(STATUS "TTS is enabled") + add_definitions(-DSHERPA_ONNX_ENABLE_TTS=1) +else() + message(WARNING "TTS is disabled") + add_definitions(-DSHERPA_ONNX_ENABLE_TTS=0) +endif() + if(SHERPA_ONNX_ENABLE_WASM_TTS) + if(NOT SHERPA_ONNX_ENABLE_TTS) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build wasm TTS") + endif() + if(NOT SHERPA_ONNX_ENABLE_WASM) message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for TTS") endif() @@ -238,10 +255,12 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET) include(asio) endif() -include(espeak-ng-for-piper) -set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) -message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") -include(piper-phonemize) +if(SHERPA_ONNX_ENABLE_TTS) + include(espeak-ng-for-piper) + set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) + message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") + include(piper-phonemize) +endif() add_subdirectory(sherpa-onnx) @@ -266,7 +285,12 @@ if(NOT BUILD_SHARED_LIBS) endif() # See https://people.freedesktop.org/~dbn/pkg-config-guide.html -configure_file(cmake/sherpa-onnx.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +if(SHERPA_ONNX_ENABLE_TTS) + configure_file(cmake/sherpa-onnx.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +else() + configure_file(cmake/sherpa-onnx-no-tts.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +endif() + install( FILES ${PROJECT_BINARY_DIR}/sherpa-onnx.pc diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index f2f9ea6dc..fd4577f46 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -4,8 +4,10 @@ include_directories(${CMAKE_SOURCE_DIR}) add_executable(decode-file-c-api decode-file-c-api.c) target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) -add_executable(offline-tts-c-api offline-tts-c-api.c) -target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) +if(SHERPA_ONNX_ENABLE_TTS) + add_executable(offline-tts-c-api offline-tts-c-api.c) + target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) +endif() add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c) target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index c0c60f33a..689367cb8 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -15,13 +15,16 @@ #include "sherpa-onnx/csrc/keyword-spotter.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-recognizer.h" -#include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/spoken-language-identification.h" #include "sherpa-onnx/csrc/voice-activity-detector.h" #include "sherpa-onnx/csrc/wave-reader.h" #include "sherpa-onnx/csrc/wave-writer.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/csrc/offline-tts.h" +#endif + struct SherpaOnnxOnlineRecognizer { std::unique_ptr impl; }; @@ -742,6 +745,7 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { p->impl->Reset(); } +#if SHERPA_ONNX_ENABLE_TTS == 1 struct SherpaOnnxOfflineTts { std::unique_ptr impl; }; @@ -857,6 +861,7 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio( delete p; } } +#endif // SHERPA_ONNX_ENABLE_TTS == 1 int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 6a14aa780..86dbc12c9 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -43,7 +43,6 @@ set(sources offline-transducer-model-config.cc offline-transducer-model.cc offline-transducer-modified-beam-search-decoder.cc - offline-tts-character-frontend.cc offline-wenet-ctc-model-config.cc offline-wenet-ctc-model.cc offline-whisper-greedy-search-decoder.cc @@ -79,7 +78,6 @@ set(sources packed-sequence.cc pad-sequence.cc parse-options.cc - piper-phonemize-lexicon.cc provider.cc resample.cc session.cc @@ -99,6 +97,7 @@ set(sources vad-model.cc voice-activity-detector.cc wave-reader.cc + wave-writer.cc ) # speaker embedding extractor @@ -110,15 +109,18 @@ list(APPEND sources speaker-embedding-manager.cc ) -list(APPEND sources - lexicon.cc - offline-tts-impl.cc - offline-tts-model-config.cc - offline-tts-vits-model-config.cc - offline-tts-vits-model.cc - offline-tts.cc - wave-writer.cc -) +if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND sources + lexicon.cc + offline-tts-character-frontend.cc + offline-tts-impl.cc + offline-tts-model-config.cc + offline-tts-vits-model-config.cc + offline-tts-vits-model.cc + offline-tts.cc + piper-phonemize-lexicon.cc + ) +endif() if(SHERPA_ONNX_ENABLE_CHECK) list(APPEND sources log.cc) @@ -130,14 +132,21 @@ if(APPLE) ) endif() - if(ANDROID_NDK) target_link_libraries(sherpa-onnx-core android log) endif() -target_link_libraries(sherpa-onnx-core kaldi-native-fbank-core) +target_link_libraries(sherpa-onnx-core + kaldi-native-fbank-core + kaldi-decoder-core +) -target_link_libraries(sherpa-onnx-core kaldi-decoder-core) +if(SHERPA_ONNX_ENABLE_GPU) + target_link_libraries(sherpa-onnx-core + onnxruntime_providers_cuda + onnxruntime_providers_shared + ) +endif() if(BUILD_SHARED_LIBS) target_link_libraries(sherpa-onnx-core onnxruntime) @@ -151,15 +160,10 @@ if(NOT BUILD_SHARED_LIBS AND APPLE) target_link_libraries(sherpa-onnx-core "-framework Foundation") endif() -if(SHERPA_ONNX_ENABLE_GPU) - target_link_libraries(sherpa-onnx-core - onnxruntime_providers_cuda - onnxruntime_providers_shared - ) +if(SHERPA_ONNX_ENABLE_TTS) + target_link_libraries(sherpa-onnx-core piper_phonemize) endif() -target_link_libraries(sherpa-onnx-core piper_phonemize) - if(SHERPA_ONNX_ENABLE_CHECK) target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1) @@ -185,17 +189,24 @@ if(SHERPA_ONNX_ENABLE_BINARY) add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc) add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc) add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc) - add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc) + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) + endif() + set(main_exes sherpa-onnx sherpa-onnx-keyword-spotter sherpa-onnx-offline sherpa-onnx-offline-parallel - sherpa-onnx-offline-tts sherpa-onnx-offline-language-identification ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND main_exes + sherpa-onnx-offline-tts + ) + endif() foreach(exe IN LISTS main_exes) target_link_libraries(${exe} sherpa-onnx-core) @@ -235,17 +246,27 @@ endif() if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc) - add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc) add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc) + + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) + endif() + set(exes sherpa-onnx-alsa sherpa-onnx-keyword-spotter-alsa sherpa-onnx-alsa-offline - sherpa-onnx-offline-tts-play-alsa sherpa-onnx-alsa-offline-speaker-identification ) + + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND exes + sherpa-onnx-offline-tts-play-alsa + ) + endif() + foreach(exe IN LISTS exes) target_link_libraries(${exe} sherpa-onnx-core) endforeach() @@ -279,10 +300,12 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) endif() if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY) - add_executable(sherpa-onnx-offline-tts-play - sherpa-onnx-offline-tts-play.cc - microphone.cc - ) + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts-play + sherpa-onnx-offline-tts-play.cc + microphone.cc + ) + endif() add_executable(sherpa-onnx-keyword-spotter-microphone sherpa-onnx-keyword-spotter-microphone.cc @@ -325,10 +348,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY) sherpa-onnx-keyword-spotter-microphone sherpa-onnx-microphone-offline sherpa-onnx-microphone-offline-speaker-identification - sherpa-onnx-offline-tts-play sherpa-onnx-vad-microphone sherpa-onnx-vad-microphone-offline-asr ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND exes + sherpa-onnx-offline-tts-play + ) + endif() + foreach(exe IN LISTS exes) target_link_libraries(${exe} ${PA_LIB} sherpa-onnx-core) endforeach() @@ -369,10 +397,8 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY) target_link_libraries(sherpa-onnx-online-websocket-client sherpa-onnx-core) if(NOT WIN32) - target_link_libraries(sherpa-onnx-online-websocket-server -pthread) target_compile_options(sherpa-onnx-online-websocket-server PRIVATE -Wno-deprecated-declarations) - target_link_libraries(sherpa-onnx-online-websocket-client -pthread) target_compile_options(sherpa-onnx-online-websocket-client PRIVATE -Wno-deprecated-declarations) endif() @@ -384,7 +410,6 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY) target_link_libraries(sherpa-onnx-offline-websocket-server sherpa-onnx-core) if(NOT WIN32) - target_link_libraries(sherpa-onnx-offline-websocket-server -pthread) target_compile_options(sherpa-onnx-offline-websocket-server PRIVATE -Wno-deprecated-declarations) endif() @@ -422,13 +447,17 @@ if(SHERPA_ONNX_ENABLE_TESTS) context-graph-test.cc packed-sequence-test.cc pad-sequence-test.cc - piper-phonemize-test.cc slice-test.cc stack-test.cc transpose-test.cc unbind-test.cc utfcpp-test.cc ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND sherpa_onnx_test_srcs + piper-phonemize-test.cc + ) + endif() list(APPEND sherpa_onnx_test_srcs speaker-embedding-manager-test.cc diff --git a/sherpa-onnx/jni/jni.cc b/sherpa-onnx/jni/jni.cc index 1dbf96a7d..da0b50e45 100644 --- a/sherpa-onnx/jni/jni.cc +++ b/sherpa-onnx/jni/jni.cc @@ -24,7 +24,6 @@ #include "sherpa-onnx/csrc/keyword-spotter.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-recognizer.h" -#include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" @@ -33,6 +32,10 @@ #include "sherpa-onnx/csrc/wave-reader.h" #include "sherpa-onnx/csrc/wave-writer.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/csrc/offline-tts.h" +#endif + #define SHERPA_ONNX_EXTERN_C extern "C" namespace sherpa_onnx { @@ -629,8 +632,8 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { env->ReleaseStringUTFChars(s, p); fid = env->GetFieldID(whisper_config_cls, "tailPaddings", "I"); - ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config, - fid); + ans.model_config.whisper.tail_paddings = + env->GetIntField(whisper_config, fid); return ans; } @@ -782,6 +785,7 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { return ans; } +#if SHERPA_ONNX_ENABLE_TTS == 1 class SherpaOnnxOfflineTts { public: #if __ANDROID_API__ >= 9 @@ -878,6 +882,7 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { return ans; } +#endif } // namespace sherpa_onnx @@ -1209,6 +1214,15 @@ Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames( return obj_arr; } +// see +// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables +static jobject NewInteger(JNIEnv *env, int32_t value) { + jclass cls = env->FindClass("java/lang/Integer"); + jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); + return env->NewObject(cls, constructor, value); +} + +#if SHERPA_ONNX_ENABLE_TTS == 1 SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { @@ -1265,14 +1279,6 @@ JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers( ->NumSpeakers(); } -// see -// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables -static jobject NewInteger(JNIEnv *env, int32_t value) { - jclass cls = env->FindClass("java/lang/Integer"); - jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); - return env->NewObject(cls, constructor, value); -} - SHERPA_ONNX_EXTERN_C JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/, @@ -1336,6 +1342,7 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl( return obj_arr; } +#endif SHERPA_ONNX_EXTERN_C JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( diff --git a/sherpa-onnx/python/csrc/CMakeLists.txt b/sherpa-onnx/python/csrc/CMakeLists.txt index ff81d5e4e..9e5af779d 100644 --- a/sherpa-onnx/python/csrc/CMakeLists.txt +++ b/sherpa-onnx/python/csrc/CMakeLists.txt @@ -15,9 +15,6 @@ set(srcs offline-stream.cc offline-tdnn-model-config.cc offline-transducer-model-config.cc - offline-tts-model-config.cc - offline-tts-vits-model-config.cc - offline-tts.cc offline-wenet-ctc-model-config.cc offline-whisper-model-config.cc offline-zipformer-ctc-model-config.cc @@ -44,6 +41,14 @@ else() list(APPEND srcs faked-alsa.cc) endif() +if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND srcs + offline-tts-model-config.cc + offline-tts-vits-model-config.cc + offline-tts.cc + ) +endif() + pybind11_add_module(_sherpa_onnx ${srcs}) if(APPLE) diff --git a/sherpa-onnx/python/csrc/sherpa-onnx.cc b/sherpa-onnx/python/csrc/sherpa-onnx.cc index b30ed16da..62c64ec72 100644 --- a/sherpa-onnx/python/csrc/sherpa-onnx.cc +++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc @@ -15,7 +15,6 @@ #include "sherpa-onnx/python/csrc/offline-model-config.h" #include "sherpa-onnx/python/csrc/offline-recognizer.h" #include "sherpa-onnx/python/csrc/offline-stream.h" -#include "sherpa-onnx/python/csrc/offline-tts.h" #include "sherpa-onnx/python/csrc/online-lm-config.h" #include "sherpa-onnx/python/csrc/online-model-config.h" #include "sherpa-onnx/python/csrc/online-recognizer.h" @@ -27,6 +26,10 @@ #include "sherpa-onnx/python/csrc/vad-model.h" #include "sherpa-onnx/python/csrc/voice-activity-detector.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/python/csrc/offline-tts.h" +#endif + namespace sherpa_onnx { PYBIND11_MODULE(_sherpa_onnx, m) { @@ -53,7 +56,10 @@ PYBIND11_MODULE(_sherpa_onnx, m) { PybindCircularBuffer(&m); PybindVoiceActivityDetector(&m); +#if SHERPA_ONNX_ENABLE_TTS == 1 PybindOfflineTts(&m); +#endif + PybindSpeakerEmbeddingExtractor(&m); PybindSpeakerEmbeddingManager(&m); PybindSpokenLanguageIdentification(&m);