From 4e040c596ecb56d79b12be56428cd2d1f52bbf48 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 26 Mar 2024 17:21:35 +0800 Subject: [PATCH] Support including TTS conditionally. (#699) --- .github/workflows/build-xcframework.yaml | 49 ++++++- .github/workflows/linux.yaml | 16 ++- .github/workflows/macos.yaml | 14 +- .github/workflows/windows-x64.yaml | 17 ++- .github/workflows/windows-x86.yaml | 17 ++- CMakeLists.txt | 34 ++++- build-ios-no-tts.sh | 169 +++++++++++++++++++++++ c-api-examples/CMakeLists.txt | 6 +- cmake/sherpa-onnx-no-tts.pc.in | 16 +++ sherpa-onnx/c-api/c-api.cc | 7 +- sherpa-onnx/csrc/CMakeLists.txt | 97 ++++++++----- sherpa-onnx/jni/jni.cc | 29 ++-- sherpa-onnx/python/csrc/CMakeLists.txt | 11 +- sherpa-onnx/python/csrc/sherpa-onnx.cc | 8 +- 14 files changed, 413 insertions(+), 77 deletions(-) create mode 100755 build-ios-no-tts.sh create mode 100644 cmake/sherpa-onnx-no-tts.pc.in diff --git a/.github/workflows/build-xcframework.yaml b/.github/workflows/build-xcframework.yaml index 85ba4bcd6..d4e8c8967 100644 --- a/.github/workflows/build-xcframework.yaml +++ b/.github/workflows/build-xcframework.yaml @@ -32,28 +32,45 @@ concurrency: jobs: build_xcframework: - name: Build xcframework on ${{ matrix.os }} + name: tts-${{ matrix.with_tts }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [macos-latest] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 - name: Build iOS + if: matrix.with_tts == 'ON' shell: bash run: | ./build-ios.sh + - name: Build iOS (No tts) + if: matrix.with_tts == 'OFF' + shell: bash + run: | + ./build-ios-no-tts.sh + - name: Display artifacts + if: matrix.with_tts == 'ON' shell: bash run: | brew install tree tree -L 2 ./build-ios + - name: Display artifacts + if: matrix.with_tts == 'OFF' + shell: bash + run: | + brew install tree + tree -L 2 ./build-ios-no-tts + - name: Package artifacts + if: matrix.with_tts == 'ON' shell: bash run: | SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) @@ -71,11 +88,37 @@ jobs: ls -lh + - name: Package artifacts + if: matrix.with_tts == 'OFF' + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION=$SHERPA_ONNX_VERSION" >> "$GITHUB_ENV" + + rm -rf build-ios-no-tts/build + rm -rf build-ios-no-tts/install + rm -rf build-ios-no-tts/ios-onnxruntime/.git + + tree build-ios-no-tts + + filename=sherpa-onnx-${SHERPA_ONNX_VERSION}-ios-no-tts.tar.bz2 + + tar cjvf $filename ./build-ios-no-tts + + ls -lh + - uses: actions/upload-artifact@v4 + if: matrix.with_tts == 'ON' with: name: sherpa-onnx-ios-libs path: ./build-ios + - uses: actions/upload-artifact@v4 + if: matrix.with_tts == 'OFF' + with: + name: sherpa-onnx-ios-libs-no-tts + path: ./build-ios-no-tts + # https://huggingface.co/docs/hub/spaces-github-actions - name: Publish to huggingface if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') @@ -96,7 +139,7 @@ jobs: cd huggingface git lfs pull - cp -v ../sherpa-onnx-*-ios.tar.bz2 ./ + cp -v ../sherpa-onnx-*.tar.bz2 ./ git status git lfs track "*.bz2" @@ -113,4 +156,4 @@ jobs: with: file_glob: true overwrite: true - file: sherpa-onnx-*-ios.tar.bz2 + file: sherpa-onnx-*.tar.bz2 diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index b1cf6ed91..7df1761a1 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -45,7 +45,7 @@ concurrency: jobs: linux: - name: ${{ matrix.build_type }} ${{ matrix.shared_lib }} + name: ${{ matrix.build_type }} shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -53,6 +53,7 @@ jobs: os: [ubuntu-latest] build_type: [Release, Debug] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -93,7 +94,7 @@ jobs: mkdir build cd build - cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. make -j2 make install @@ -120,7 +121,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }} + name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: build/bin/* - name: Test spoken language identification (C API) @@ -175,6 +176,7 @@ jobs: .github/scripts/test-offline-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin:$PATH @@ -226,7 +228,11 @@ jobs: suffix=static fi - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix + else + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-$suffix-no-tts + fi mkdir $dst cp -a build/install/bin $dst/ @@ -278,6 +284,6 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }} + name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: tts diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index a80456279..4152cf6c4 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -45,13 +45,14 @@ concurrency: jobs: macos: runs-on: ${{ matrix.os }} - name: ${{ matrix.build_type }} ${{ matrix.lib_type }} + name: ${{ matrix.build_type }} ${{ matrix.lib_type }} tts-${{ matrix.with_tts }} strategy: fail-fast: false matrix: os: [macos-latest] build_type: [Release, Debug] lib_type: [static, shared] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -61,7 +62,7 @@ jobs: - name: ccache uses: hendrikmuhs/ccache-action@v1.2 with: - key: ${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }} + key: ${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.lib_type }}-tts-${{ matrix.with_tts }} - name: Configure CMake shell: bash @@ -79,7 +80,7 @@ jobs: BUILD_SHARED_LIBS=ON fi - cmake -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_INSTALL_PREFIX=./install .. + cmake -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D BUILD_SHARED_LIBS=$BUILD_SHARED_LIBS -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_OSX_ARCHITECTURES='arm64;x86_64' -DCMAKE_INSTALL_PREFIX=./install .. - name: Build sherpa-onnx for macos shell: bash @@ -135,6 +136,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin:$PATH @@ -196,7 +198,11 @@ jobs: run: | SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }} + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }} + else + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-osx-universal2-${{ matrix.lib_type }}-no-tts + fi mkdir $dst cp -a build/install/bin $dst/ diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index 306118621..bd90119c2 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -40,13 +40,14 @@ concurrency: jobs: windows_x64: - name: ${{ matrix.shared_lib }} + name: shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [windows-latest] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -58,7 +59,8 @@ jobs: run: | mkdir build cd build - cmake -A x64 -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + cmake -A x64 -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install .. + - name: Build sherpa-onnx for windows shell: bash run: | @@ -93,6 +95,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH @@ -155,9 +158,15 @@ jobs: shared_lib=${{ matrix.shared_lib }} if [[ $shared_lib == "ON" ]]; then - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-shared + suffix=shared + else + suffix=static + fi + + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix else - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-static + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x64-$suffix-no-tts fi mkdir $dst diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index be0ddc8dd..69ad7cd97 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -40,13 +40,14 @@ concurrency: jobs: windows_x86: - name: Windows x86 + name: shared-${{ matrix.shared_lib }} tts-${{ matrix.with_tts }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [windows-latest] shared_lib: [ON, OFF] + with_tts: [ON, OFF] steps: - uses: actions/checkout@v4 @@ -58,7 +59,7 @@ jobs: run: | mkdir build cd build - cmake -A Win32 -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -D CMAKE_INSTALL_PREFIX=./install .. + cmake -A Win32 -DSHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -D CMAKE_INSTALL_PREFIX=./install .. - name: Build sherpa-onnx for windows shell: bash @@ -94,6 +95,7 @@ jobs: .github/scripts/test-online-ctc.sh - name: Test offline TTS + if: matrix.with_tts == 'ON' shell: bash run: | export PATH=$PWD/build/bin/Release:$PATH @@ -157,10 +159,17 @@ jobs: dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86 shared_lib=${{ matrix.shared_lib }} + if [[ $shared_lib == "ON" ]]; then - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-shared + suffix=shared + else + suffix=static + fi + + if [[ ${{ matrix.with_tts }} ]]; then + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix else - dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-static + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-win-x86-$suffix-no-tts fi mkdir $dst diff --git a/CMakeLists.txt b/CMakeLists.txt index 495cab28d..94613609e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) +option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON) option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-installed onnxruntime if available" ON) @@ -111,10 +112,26 @@ message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") +message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") +message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}") +message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}") +message(STATUS "SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY ${SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY}") message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE}") +if(SHERPA_ONNX_ENABLE_TTS) + message(STATUS "TTS is enabled") + add_definitions(-DSHERPA_ONNX_ENABLE_TTS=1) +else() + message(WARNING "TTS is disabled") + add_definitions(-DSHERPA_ONNX_ENABLE_TTS=0) +endif() + if(SHERPA_ONNX_ENABLE_WASM_TTS) + if(NOT SHERPA_ONNX_ENABLE_TTS) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_TTS to ON if you want to build wasm TTS") + endif() + if(NOT SHERPA_ONNX_ENABLE_WASM) message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for TTS") endif() @@ -238,10 +255,12 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET) include(asio) endif() -include(espeak-ng-for-piper) -set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) -message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") -include(piper-phonemize) +if(SHERPA_ONNX_ENABLE_TTS) + include(espeak-ng-for-piper) + set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR}) + message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}") + include(piper-phonemize) +endif() add_subdirectory(sherpa-onnx) @@ -266,7 +285,12 @@ if(NOT BUILD_SHARED_LIBS) endif() # See https://people.freedesktop.org/~dbn/pkg-config-guide.html -configure_file(cmake/sherpa-onnx.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +if(SHERPA_ONNX_ENABLE_TTS) + configure_file(cmake/sherpa-onnx.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +else() + configure_file(cmake/sherpa-onnx-no-tts.pc.in ${PROJECT_BINARY_DIR}/sherpa-onnx.pc @ONLY) +endif() + install( FILES ${PROJECT_BINARY_DIR}/sherpa-onnx.pc diff --git a/build-ios-no-tts.sh b/build-ios-no-tts.sh new file mode 100755 index 000000000..f9cd626ed --- /dev/null +++ b/build-ios-no-tts.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash + +set -e + +dir=build-ios-no-tts +mkdir -p $dir +cd $dir +onnxruntime_version=1.17.1 +onnxruntime_dir=ios-onnxruntime/$onnxruntime_version + +if [ ! -f $onnxruntime_dir/onnxruntime.xcframework/ios-arm64/onnxruntime.a ]; then + mkdir -p $onnxruntime_dir + pushd $onnxruntime_dir + wget https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime.xcframework-${onnxruntime_version}.tar.bz2 + tar xvf onnxruntime.xcframework-${onnxruntime_version}.tar.bz2 + rm onnxruntime.xcframework-${onnxruntime_version}.tar.bz2 + cd .. + ln -sf $onnxruntime_version/onnxruntime.xcframework . + popd +fi + +# First, for simulator +echo "Building for simulator (x86_64)" + +export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/ios-onnxruntime/onnxruntime.xcframework/ios-arm64_x86_64-simulator +export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$PWD/ios-onnxruntime/onnxruntime.xcframework/Headers + +echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" +echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" + +# Note: We use -DENABLE_ARC=1 here to fix the linking error: +# +# The symbol _NSLog is not defined +# + +cmake \ + -DSHERPA_ONNX_ENABLE_TTS=OFF \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ + -S .. \ + -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ + -DPLATFORM=SIMULATOR64 \ + -DENABLE_BITCODE=0 \ + -DENABLE_ARC=1 \ + -DENABLE_VISIBILITY=0 \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DDEPLOYMENT_TARGET=13.0 \ + -B build/simulator_x86_64 + +cmake --build build/simulator_x86_64 -j 4 --verbose + +echo "Building for simulator (arm64)" + +cmake \ + -DSHERPA_ONNX_ENABLE_TTS=OFF \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ + -S .. \ + -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ + -DPLATFORM=SIMULATORARM64 \ + -DENABLE_BITCODE=0 \ + -DENABLE_ARC=1 \ + -DENABLE_VISIBILITY=0 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DDEPLOYMENT_TARGET=13.0 \ + -B build/simulator_arm64 + +cmake --build build/simulator_arm64 -j 4 --verbose + +echo "Building for arm64" + +export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/ios-onnxruntime/onnxruntime.xcframework/ios-arm64 + + +cmake \ + -DSHERPA_ONNX_ENABLE_TTS=OFF \ + -DBUILD_PIPER_PHONMIZE_EXE=OFF \ + -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ + -DBUILD_ESPEAK_NG_EXE=OFF \ + -DBUILD_ESPEAK_NG_TESTS=OFF \ + -S .. \ + -DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \ + -DPLATFORM=OS64 \ + -DENABLE_BITCODE=0 \ + -DENABLE_ARC=1 \ + -DENABLE_VISIBILITY=0 \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DDEPLOYMENT_TARGET=13.0 \ + -B build/os64 + +cmake --build build/os64 -j 4 +# Generate headers for sherpa-onnx.xcframework +cmake --build build/os64 --target install + +echo "Generate xcframework" + +mkdir -p "build/simulator/lib" +for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \ + libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a; do + lipo -create build/simulator_arm64/lib/${f} \ + build/simulator_x86_64/lib/${f} \ + -output build/simulator/lib/${f} +done + +# Merge archive first, because the following xcodebuild create xcframework +# cannot accept multi archive with the same architecture. +libtool -static -o build/simulator/sherpa-onnx.a \ + build/simulator/lib/libkaldi-native-fbank-core.a \ + build/simulator/lib/libsherpa-onnx-c-api.a \ + build/simulator/lib/libsherpa-onnx-core.a \ + build/simulator/lib/libsherpa-onnx-fst.a \ + build/simulator/lib/libsherpa-onnx-kaldifst-core.a \ + build/simulator/lib/libkaldi-decoder-core.a + +libtool -static -o build/os64/sherpa-onnx.a \ + build/os64/lib/libkaldi-native-fbank-core.a \ + build/os64/lib/libsherpa-onnx-c-api.a \ + build/os64/lib/libsherpa-onnx-core.a \ + build/os64/lib/libsherpa-onnx-fst.a \ + build/os64/lib/libsherpa-onnx-kaldifst-core.a \ + build/os64/lib/libkaldi-decoder-core.a + +rm -rf sherpa-onnx.xcframework + +xcodebuild -create-xcframework \ + -library "build/os64/sherpa-onnx.a" \ + -library "build/simulator/sherpa-onnx.a" \ + -output sherpa-onnx.xcframework + +# Copy Headers +mkdir -p sherpa-onnx.xcframework/Headers +cp -av install/include/* sherpa-onnx.xcframework/Headers + +pushd sherpa-onnx.xcframework/ios-arm64_x86_64-simulator +ln -s sherpa-onnx.a libsherpa-onnx.a +popd + +pushd sherpa-onnx.xcframework/ios-arm64 +ln -s sherpa-onnx.a libsherpa-onnx.a diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index f2f9ea6dc..fd4577f46 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -4,8 +4,10 @@ include_directories(${CMAKE_SOURCE_DIR}) add_executable(decode-file-c-api decode-file-c-api.c) target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) -add_executable(offline-tts-c-api offline-tts-c-api.c) -target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) +if(SHERPA_ONNX_ENABLE_TTS) + add_executable(offline-tts-c-api offline-tts-c-api.c) + target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) +endif() add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c) target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) diff --git a/cmake/sherpa-onnx-no-tts.pc.in b/cmake/sherpa-onnx-no-tts.pc.in new file mode 100644 index 000000000..df193043c --- /dev/null +++ b/cmake/sherpa-onnx-no-tts.pc.in @@ -0,0 +1,16 @@ +prefix="@CMAKE_INSTALL_PREFIX@" +exec_prefix="${prefix}" +includedir="${prefix}/include" +libdir="${exec_prefix}/lib" + +Name: sherpa-onnx +Description: pkg-config for sherpa-onnx with TTS support +URL: https://github.com/k2-fsa/sherpa-onnx + +Version: @SHERPA_ONNX_VERSION@ +Cflags: -I"${includedir}" + +# Note: -lcargs is required only for the following file +# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c +# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c +Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index c0c60f33a..689367cb8 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -15,13 +15,16 @@ #include "sherpa-onnx/csrc/keyword-spotter.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-recognizer.h" -#include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/spoken-language-identification.h" #include "sherpa-onnx/csrc/voice-activity-detector.h" #include "sherpa-onnx/csrc/wave-reader.h" #include "sherpa-onnx/csrc/wave-writer.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/csrc/offline-tts.h" +#endif + struct SherpaOnnxOnlineRecognizer { std::unique_ptr impl; }; @@ -742,6 +745,7 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { p->impl->Reset(); } +#if SHERPA_ONNX_ENABLE_TTS == 1 struct SherpaOnnxOfflineTts { std::unique_ptr impl; }; @@ -857,6 +861,7 @@ void SherpaOnnxDestroyOfflineTtsGeneratedAudio( delete p; } } +#endif // SHERPA_ONNX_ENABLE_TTS == 1 int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, int32_t sample_rate, const char *filename) { diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 6a14aa780..86dbc12c9 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -43,7 +43,6 @@ set(sources offline-transducer-model-config.cc offline-transducer-model.cc offline-transducer-modified-beam-search-decoder.cc - offline-tts-character-frontend.cc offline-wenet-ctc-model-config.cc offline-wenet-ctc-model.cc offline-whisper-greedy-search-decoder.cc @@ -79,7 +78,6 @@ set(sources packed-sequence.cc pad-sequence.cc parse-options.cc - piper-phonemize-lexicon.cc provider.cc resample.cc session.cc @@ -99,6 +97,7 @@ set(sources vad-model.cc voice-activity-detector.cc wave-reader.cc + wave-writer.cc ) # speaker embedding extractor @@ -110,15 +109,18 @@ list(APPEND sources speaker-embedding-manager.cc ) -list(APPEND sources - lexicon.cc - offline-tts-impl.cc - offline-tts-model-config.cc - offline-tts-vits-model-config.cc - offline-tts-vits-model.cc - offline-tts.cc - wave-writer.cc -) +if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND sources + lexicon.cc + offline-tts-character-frontend.cc + offline-tts-impl.cc + offline-tts-model-config.cc + offline-tts-vits-model-config.cc + offline-tts-vits-model.cc + offline-tts.cc + piper-phonemize-lexicon.cc + ) +endif() if(SHERPA_ONNX_ENABLE_CHECK) list(APPEND sources log.cc) @@ -130,14 +132,21 @@ if(APPLE) ) endif() - if(ANDROID_NDK) target_link_libraries(sherpa-onnx-core android log) endif() -target_link_libraries(sherpa-onnx-core kaldi-native-fbank-core) +target_link_libraries(sherpa-onnx-core + kaldi-native-fbank-core + kaldi-decoder-core +) -target_link_libraries(sherpa-onnx-core kaldi-decoder-core) +if(SHERPA_ONNX_ENABLE_GPU) + target_link_libraries(sherpa-onnx-core + onnxruntime_providers_cuda + onnxruntime_providers_shared + ) +endif() if(BUILD_SHARED_LIBS) target_link_libraries(sherpa-onnx-core onnxruntime) @@ -151,15 +160,10 @@ if(NOT BUILD_SHARED_LIBS AND APPLE) target_link_libraries(sherpa-onnx-core "-framework Foundation") endif() -if(SHERPA_ONNX_ENABLE_GPU) - target_link_libraries(sherpa-onnx-core - onnxruntime_providers_cuda - onnxruntime_providers_shared - ) +if(SHERPA_ONNX_ENABLE_TTS) + target_link_libraries(sherpa-onnx-core piper_phonemize) endif() -target_link_libraries(sherpa-onnx-core piper_phonemize) - if(SHERPA_ONNX_ENABLE_CHECK) target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1) @@ -185,17 +189,24 @@ if(SHERPA_ONNX_ENABLE_BINARY) add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc) add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc) add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc) - add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) add_executable(sherpa-onnx-offline-language-identification sherpa-onnx-offline-language-identification.cc) + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) + endif() + set(main_exes sherpa-onnx sherpa-onnx-keyword-spotter sherpa-onnx-offline sherpa-onnx-offline-parallel - sherpa-onnx-offline-tts sherpa-onnx-offline-language-identification ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND main_exes + sherpa-onnx-offline-tts + ) + endif() foreach(exe IN LISTS main_exes) target_link_libraries(${exe} sherpa-onnx-core) @@ -235,17 +246,27 @@ endif() if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) add_executable(sherpa-onnx-keyword-spotter-alsa sherpa-onnx-keyword-spotter-alsa.cc alsa.cc) - add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc) add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc) + + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) + endif() + set(exes sherpa-onnx-alsa sherpa-onnx-keyword-spotter-alsa sherpa-onnx-alsa-offline - sherpa-onnx-offline-tts-play-alsa sherpa-onnx-alsa-offline-speaker-identification ) + + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND exes + sherpa-onnx-offline-tts-play-alsa + ) + endif() + foreach(exe IN LISTS exes) target_link_libraries(${exe} sherpa-onnx-core) endforeach() @@ -279,10 +300,12 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) endif() if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY) - add_executable(sherpa-onnx-offline-tts-play - sherpa-onnx-offline-tts-play.cc - microphone.cc - ) + if(SHERPA_ONNX_ENABLE_TTS) + add_executable(sherpa-onnx-offline-tts-play + sherpa-onnx-offline-tts-play.cc + microphone.cc + ) + endif() add_executable(sherpa-onnx-keyword-spotter-microphone sherpa-onnx-keyword-spotter-microphone.cc @@ -325,10 +348,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY) sherpa-onnx-keyword-spotter-microphone sherpa-onnx-microphone-offline sherpa-onnx-microphone-offline-speaker-identification - sherpa-onnx-offline-tts-play sherpa-onnx-vad-microphone sherpa-onnx-vad-microphone-offline-asr ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND exes + sherpa-onnx-offline-tts-play + ) + endif() + foreach(exe IN LISTS exes) target_link_libraries(${exe} ${PA_LIB} sherpa-onnx-core) endforeach() @@ -369,10 +397,8 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY) target_link_libraries(sherpa-onnx-online-websocket-client sherpa-onnx-core) if(NOT WIN32) - target_link_libraries(sherpa-onnx-online-websocket-server -pthread) target_compile_options(sherpa-onnx-online-websocket-server PRIVATE -Wno-deprecated-declarations) - target_link_libraries(sherpa-onnx-online-websocket-client -pthread) target_compile_options(sherpa-onnx-online-websocket-client PRIVATE -Wno-deprecated-declarations) endif() @@ -384,7 +410,6 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY) target_link_libraries(sherpa-onnx-offline-websocket-server sherpa-onnx-core) if(NOT WIN32) - target_link_libraries(sherpa-onnx-offline-websocket-server -pthread) target_compile_options(sherpa-onnx-offline-websocket-server PRIVATE -Wno-deprecated-declarations) endif() @@ -422,13 +447,17 @@ if(SHERPA_ONNX_ENABLE_TESTS) context-graph-test.cc packed-sequence-test.cc pad-sequence-test.cc - piper-phonemize-test.cc slice-test.cc stack-test.cc transpose-test.cc unbind-test.cc utfcpp-test.cc ) + if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND sherpa_onnx_test_srcs + piper-phonemize-test.cc + ) + endif() list(APPEND sherpa_onnx_test_srcs speaker-embedding-manager-test.cc diff --git a/sherpa-onnx/jni/jni.cc b/sherpa-onnx/jni/jni.cc index 1dbf96a7d..da0b50e45 100644 --- a/sherpa-onnx/jni/jni.cc +++ b/sherpa-onnx/jni/jni.cc @@ -24,7 +24,6 @@ #include "sherpa-onnx/csrc/keyword-spotter.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-recognizer.h" -#include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" @@ -33,6 +32,10 @@ #include "sherpa-onnx/csrc/wave-reader.h" #include "sherpa-onnx/csrc/wave-writer.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/csrc/offline-tts.h" +#endif + #define SHERPA_ONNX_EXTERN_C extern "C" namespace sherpa_onnx { @@ -629,8 +632,8 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { env->ReleaseStringUTFChars(s, p); fid = env->GetFieldID(whisper_config_cls, "tailPaddings", "I"); - ans.model_config.whisper.tail_paddings = env->GetIntField(whisper_config, - fid); + ans.model_config.whisper.tail_paddings = + env->GetIntField(whisper_config, fid); return ans; } @@ -782,6 +785,7 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { return ans; } +#if SHERPA_ONNX_ENABLE_TTS == 1 class SherpaOnnxOfflineTts { public: #if __ANDROID_API__ >= 9 @@ -878,6 +882,7 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { return ans; } +#endif } // namespace sherpa_onnx @@ -1209,6 +1214,15 @@ Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames( return obj_arr; } +// see +// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables +static jobject NewInteger(JNIEnv *env, int32_t value) { + jclass cls = env->FindClass("java/lang/Integer"); + jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); + return env->NewObject(cls, constructor, value); +} + +#if SHERPA_ONNX_ENABLE_TTS == 1 SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { @@ -1265,14 +1279,6 @@ JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers( ->NumSpeakers(); } -// see -// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables -static jobject NewInteger(JNIEnv *env, int32_t value) { - jclass cls = env->FindClass("java/lang/Integer"); - jmethodID constructor = env->GetMethodID(cls, "", "(I)V"); - return env->NewObject(cls, constructor, value); -} - SHERPA_ONNX_EXTERN_C JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/, @@ -1336,6 +1342,7 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl( return obj_arr; } +#endif SHERPA_ONNX_EXTERN_C JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( diff --git a/sherpa-onnx/python/csrc/CMakeLists.txt b/sherpa-onnx/python/csrc/CMakeLists.txt index ff81d5e4e..9e5af779d 100644 --- a/sherpa-onnx/python/csrc/CMakeLists.txt +++ b/sherpa-onnx/python/csrc/CMakeLists.txt @@ -15,9 +15,6 @@ set(srcs offline-stream.cc offline-tdnn-model-config.cc offline-transducer-model-config.cc - offline-tts-model-config.cc - offline-tts-vits-model-config.cc - offline-tts.cc offline-wenet-ctc-model-config.cc offline-whisper-model-config.cc offline-zipformer-ctc-model-config.cc @@ -44,6 +41,14 @@ else() list(APPEND srcs faked-alsa.cc) endif() +if(SHERPA_ONNX_ENABLE_TTS) + list(APPEND srcs + offline-tts-model-config.cc + offline-tts-vits-model-config.cc + offline-tts.cc + ) +endif() + pybind11_add_module(_sherpa_onnx ${srcs}) if(APPLE) diff --git a/sherpa-onnx/python/csrc/sherpa-onnx.cc b/sherpa-onnx/python/csrc/sherpa-onnx.cc index b30ed16da..62c64ec72 100644 --- a/sherpa-onnx/python/csrc/sherpa-onnx.cc +++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc @@ -15,7 +15,6 @@ #include "sherpa-onnx/python/csrc/offline-model-config.h" #include "sherpa-onnx/python/csrc/offline-recognizer.h" #include "sherpa-onnx/python/csrc/offline-stream.h" -#include "sherpa-onnx/python/csrc/offline-tts.h" #include "sherpa-onnx/python/csrc/online-lm-config.h" #include "sherpa-onnx/python/csrc/online-model-config.h" #include "sherpa-onnx/python/csrc/online-recognizer.h" @@ -27,6 +26,10 @@ #include "sherpa-onnx/python/csrc/vad-model.h" #include "sherpa-onnx/python/csrc/voice-activity-detector.h" +#if SHERPA_ONNX_ENABLE_TTS == 1 +#include "sherpa-onnx/python/csrc/offline-tts.h" +#endif + namespace sherpa_onnx { PYBIND11_MODULE(_sherpa_onnx, m) { @@ -53,7 +56,10 @@ PYBIND11_MODULE(_sherpa_onnx, m) { PybindCircularBuffer(&m); PybindVoiceActivityDetector(&m); +#if SHERPA_ONNX_ENABLE_TTS == 1 PybindOfflineTts(&m); +#endif + PybindSpeakerEmbeddingExtractor(&m); PybindSpeakerEmbeddingManager(&m); PybindSpokenLanguageIdentification(&m);