diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh new file mode 100755 index 000000000..9900a9db1 --- /dev/null +++ b/.github/scripts/test-nodejs-npm.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +set -ex + +echo "dir: $d" +cd $d +npm install +git status +ls -lh +ls -lh node_modules + +# offline asr + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 +rm sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 +node ./test-offline-nemo-ctc.js +rm -rf sherpa-onnx-nemo-ctc-en-conformer-small + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +node ./test-offline-paraformer.js +rm -rf sherpa-onnx-paraformer-zh-2023-03-28 + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +rm sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +node ./test-offline-transducer.js +rm -rf sherpa-onnx-zipformer-en-2023-06-26 + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +rm sherpa-onnx-whisper-tiny.en.tar.bz2 +node ./test-offline-whisper.js +rm -rf sherpa-onnx-whisper-tiny.en + +# online asr +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +node ./test-online-paraformer.js +rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +node ./test-online-transducer.js +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 + +# offline tts +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2 +tar xvf vits-vctk.tar.bz2 +rm vits-vctk.tar.bz2 +node ./test-offline-tts-en.js +rm -rf vits-vctk + +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 +tar xvf vits-zh-aishell3.tar.bz2 +rm vits-zh-aishell3.tar.bz2 +node ./test-offline-tts-zh.js +rm -rf vits-zh-aishell3 diff --git a/.github/workflows/.gitignore b/.github/workflows/.gitignore new file mode 100644 index 000000000..545d9ab4f --- /dev/null +++ b/.github/workflows/.gitignore @@ -0,0 +1 @@ +!*.yaml diff --git a/.github/workflows/aarch64-linux-gnu-shared.yaml b/.github/workflows/aarch64-linux-gnu-shared.yaml index 5835345db..ddafe08a9 100644 --- a/.github/workflows/aarch64-linux-gnu-shared.yaml +++ b/.github/workflows/aarch64-linux-gnu-shared.yaml @@ -142,7 +142,7 @@ jobs: shell: bash run: | export PATH=$GITHUB_WORKSPACE/toolchain/bin:$PATH - aarch64-none-linux-gnu-strip --version + aarch64-linux-gnu-strip --version SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) @@ -157,7 +157,7 @@ jobs: ls -lh $dst/bin/ echo "strip" - aarch64-none-linux-gnu-strip $dst/bin/* + aarch64-linux-gnu-strip $dst/bin/* tree $dst diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml index c75a32243..d88947c9e 100644 --- a/.github/workflows/build-wheels-aarch64.yaml +++ b/.github/workflows/build-wheels-aarch64.yaml @@ -17,12 +17,13 @@ concurrency: jobs: build_wheels_aarch64: - name: Build wheels for aarch64 on ${{ matrix.os }} + name: ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] steps: - uses: actions/checkout@v4 @@ -35,9 +36,9 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.11.4 + uses: pypa/cibuildwheel@v2.15.0 env: - CIBW_BEFORE_BUILD: "pip install -U cmake numpy" + CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" CIBW_BUILD_VERBOSITY: 3 CIBW_ARCHS_LINUX: aarch64 diff --git a/.github/workflows/build-wheels-armv7l.yaml b/.github/workflows/build-wheels-armv7l.yaml index 910d04db9..07a72b969 100644 --- a/.github/workflows/build-wheels-armv7l.yaml +++ b/.github/workflows/build-wheels-armv7l.yaml @@ -45,6 +45,7 @@ jobs: echo PYTHON_VERSION=$PYTHON_VERSION >> $GITHUB_ENV # https://github.com/mshr-h/onnx-dockerfile-for-raspberry-pi/blob/main/3.10-bullseye-build/Dockerfile.arm32v7 + # https://hub.docker.com/r/balenalib/raspberrypi3-python - name: Run docker uses: addnab/docker-run-action@v3 with: diff --git a/.github/workflows/build-wheels.yaml b/.github/workflows/build-wheels-linux.yaml similarity index 61% rename from .github/workflows/build-wheels.yaml rename to .github/workflows/build-wheels-linux.yaml index 0384c3271..c1c51666b 100644 --- a/.github/workflows/build-wheels.yaml +++ b/.github/workflows/build-wheels-linux.yaml @@ -1,4 +1,4 @@ -name: build-wheels +name: build-wheels-linux on: push: @@ -16,13 +16,14 @@ concurrency: cancel-in-progress: true jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} + build_wheels_linux: + name: ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] steps: - uses: actions/checkout@v4 @@ -30,9 +31,9 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.11.4 + uses: pypa/cibuildwheel@v2.15.0 env: - CIBW_BEFORE_BUILD: "pip install -U cmake numpy" + CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" CIBW_BUILD_VERBOSITY: 3 CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib' @@ -65,6 +66,35 @@ jobs: with: path: ./wheelhouse/*.whl + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + - name: Publish wheels to PyPI env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} @@ -76,14 +106,14 @@ jobs: twine upload ./wheelhouse/*.whl - name: Build sdist - if: ${{ matrix.os == 'ubuntu-latest' }} + if: matrix.python-version == 'cp38' shell: bash run: | python3 setup.py sdist ls -l dist/* - name: Publish sdist to PyPI - if: ${{ matrix.os == 'ubuntu-latest' }} + if: matrix.python-version == 'cp38' env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/build-wheels-macos-arm64.yaml b/.github/workflows/build-wheels-macos-arm64.yaml new file mode 100644 index 000000000..a58a26b0d --- /dev/null +++ b/.github/workflows/build-wheels-macos-arm64.yaml @@ -0,0 +1,104 @@ +name: build-wheels-macos-arm64 + +on: + push: + branches: + - wheel + tags: + - '*' + workflow_dispatch: + +env: + SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1 + +concurrency: + group: build-wheels-macos-arm64-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_wheels: + name: ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + + steps: + - uses: actions/checkout@v4 + + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + if: matrix.python-version == 'cp37' + uses: pypa/cibuildwheel@v2.11.4 + env: + CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'" + CIBW_ARCHS: "arm64" + CIBW_BUILD_VERBOSITY: 3 + + # Don't repair macOS wheels + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + + - name: Build wheels + if: matrix.python-version != 'cp37' + uses: pypa/cibuildwheel@v2.15.0 + env: + CIBW_BUILD: "${{ matrix.python-version}}-* " + CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'" + CIBW_ARCHS: "arm64" + CIBW_BUILD_VERBOSITY: 3 + + # Don't repair macOS wheels + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/ + + ls -lh ./wheelhouse/*.whl + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + + - name: Publish wheels to PyPI + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python3 -m pip install --upgrade pip + python3 -m pip install wheel twine setuptools + + twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-macos-x64.yaml b/.github/workflows/build-wheels-macos-x64.yaml new file mode 100644 index 000000000..a15d84cee --- /dev/null +++ b/.github/workflows/build-wheels-macos-x64.yaml @@ -0,0 +1,104 @@ +name: build-wheels-macos-x64 + +on: + push: + branches: + - wheel + tags: + - '*' + workflow_dispatch: + +env: + SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1 + +concurrency: + group: build-wheels-macos-x64-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_wheels: + name: ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + + steps: + - uses: actions/checkout@v4 + + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + if: matrix.python-version == 'cp37' + uses: pypa/cibuildwheel@v2.11.4 + env: + CIBW_BUILD: "${{ matrix.python-version}}-* " + CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64'" + CIBW_ARCHS: "x86_64" + CIBW_BUILD_VERBOSITY: 3 + + # Don't repair macOS wheels + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + - name: Build wheels + if: matrix.python-version != 'cp37' + uses: pypa/cibuildwheel@v2.15.0 + env: + CIBW_BUILD: "${{ matrix.python-version}}-* " + CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='x86_64'" + CIBW_ARCHS: "x86_64" + CIBW_BUILD_VERBOSITY: 3 + + # Don't repair macOS wheels + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/ + + ls -lh ./wheelhouse/*.whl + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + + - name: Publish wheels to PyPI + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python3 -m pip install --upgrade pip + python3 -m pip install wheel twine setuptools + + twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-macos.yaml b/.github/workflows/build-wheels-macos.yaml deleted file mode 100644 index 56d3d2e5f..000000000 --- a/.github/workflows/build-wheels-macos.yaml +++ /dev/null @@ -1,62 +0,0 @@ -name: build-wheels-macos - -on: - push: - branches: - - wheel - tags: - - '*' - workflow_dispatch: - -env: - SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1 - -concurrency: - group: build-wheels-macos-${{ github.ref }} - cancel-in-progress: true - -jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [macos-latest] - - steps: - - uses: actions/checkout@v4 - - # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ - # for a list of versions - - name: Build wheels - uses: pypa/cibuildwheel@v2.11.4 - env: - CIBW_BEFORE_BUILD: "pip install -U cmake numpy" - CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'" - CIBW_ARCHS: "universal2" - CIBW_BUILD_VERBOSITY: 3 - - # Don't repair macOS wheels - CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" - - - name: Display wheels - shell: bash - run: | - ls -lh ./wheelhouse/ - - ls -lh ./wheelhouse/*.whl - - - uses: actions/upload-artifact@v3 - with: - path: ./wheelhouse/*.whl - - - name: Publish wheels to PyPI - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools - - twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/build-wheels-win32.yaml b/.github/workflows/build-wheels-win32.yaml index f17b771de..9452ea372 100644 --- a/.github/workflows/build-wheels-win32.yaml +++ b/.github/workflows/build-wheels-win32.yaml @@ -17,12 +17,13 @@ concurrency: jobs: build_wheels_win32: - name: Build wheels on ${{ matrix.os }} + name: ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [windows-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] steps: - uses: actions/checkout@v4 @@ -30,10 +31,10 @@ jobs: # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ # for a list of versions - name: Build wheels - uses: pypa/cibuildwheel@v2.11.4 + uses: pypa/cibuildwheel@v2.15.0 env: - CIBW_BEFORE_BUILD: "pip install -U cmake numpy" CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-A Win32" + CIBW_BUILD: "${{ matrix.python-version}}-* " CIBW_SKIP: "*-win_amd64" CIBW_BUILD_VERBOSITY: 3 @@ -48,6 +49,35 @@ jobs: with: path: ./wheelhouse/*.whl + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + - name: Publish wheels to PyPI env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} diff --git a/.github/workflows/build-wheels-win64.yaml b/.github/workflows/build-wheels-win64.yaml new file mode 100644 index 000000000..fe711953d --- /dev/null +++ b/.github/workflows/build-wheels-win64.yaml @@ -0,0 +1,117 @@ +name: build-wheels-win64 + +on: + push: + branches: + - wheel + tags: + - '*' + workflow_dispatch: + +env: + SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1 + +concurrency: + group: build-wheels-win64-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_wheels_win64: + name: ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [windows-latest] + python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] + + steps: + - uses: actions/checkout@v4 + + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + uses: pypa/cibuildwheel@v2.15.0 + env: + CIBW_BUILD: "${{ matrix.python-version}}-* " + CIBW_SKIP: "cp27-* cp35-* *-win32 pp* *-musllinux*" + CIBW_BUILD_VERBOSITY: 3 + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/ + + ls -lh ./wheelhouse/*.whl + + - uses: actions/upload-artifact@v3 + with: + path: ./wheelhouse/*.whl + + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + + - name: Publish to huggingface + if: matrix.python-version == 'cp38' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../wheelhouse/*.whl . + + git status + git add . + git commit -m "add more wheels" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main + + - name: Publish wheels to PyPI + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python3 -m pip install --upgrade pip + python3 -m pip install wheel twine setuptools + + twine upload ./wheelhouse/*.whl diff --git a/.github/workflows/dot-net.yaml b/.github/workflows/dot-net.yaml index afea61321..12bbc0150 100644 --- a/.github/workflows/dot-net.yaml +++ b/.github/workflows/dot-net.yaml @@ -7,20 +7,6 @@ on: - fix-dot-net tags: - '*' - release: - types: - - published - - workflow_dispatch: - inputs: - release: - description: "Whether to release" - type: boolean - -env: - RELEASE: - |- # Release if there is a release tag name or a release flag in workflow_dispatch - ${{ github.event.release.tag_name != '' || github.event.inputs.release == 'true' }} workflow_dispatch: diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index e6af07a81..98f633584 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -1,10 +1,6 @@ name: npm on: - push: - branches: - - master - workflow_dispatch: concurrency: diff --git a/.github/workflows/test-nodejs-npm.yaml b/.github/workflows/test-nodejs-npm.yaml new file mode 100644 index 000000000..4905d30d2 --- /dev/null +++ b/.github/workflows/test-nodejs-npm.yaml @@ -0,0 +1,59 @@ +name: test-nodejs-npm + +on: + workflow_dispatch: + + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 23:50 UTC time every day + - cron: "50 23 * * *" + +concurrency: + group: test-nodejs-npm-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test-nodejs-npm: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] #, windows-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/setup-node@v3 + with: + node-version: 13 + registry-url: 'https://registry.npmjs.org' + + - name: Display node version + shell: bash + run: | + node --version + npm --version + + - name: Run tests + shell: bash + run: | + node --version + npm --version + + export d=nodejs-examples + ./.github/scripts/test-nodejs-npm.sh diff --git a/.github/workflows/test-nodejs.yaml b/.github/workflows/test-nodejs.yaml new file mode 100644 index 000000000..4f9ecc56a --- /dev/null +++ b/.github/workflows/test-nodejs.yaml @@ -0,0 +1,108 @@ +name: test-nodejs + +on: + push: + branches: + - master + + pull_request: + branches: + - master + + workflow_dispatch: + +concurrency: + group: test-nodejs-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test-nodejs: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] #, windows-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-Release-ON + + - name: Configure CMake + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install .. + make -j2 + make install + ls -lh install/lib + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Copy files + shell: bash + run: | + os=${{ matrix.os }} + if [[ $os == 'ubuntu-latest' ]]; then + mkdir -p scripts/nodejs/lib/linux-x64 + dst=scripts/nodejs/lib/linux-x64 + elif [[ $os == 'macos-latest' ]]; then + mkdir -p scripts/nodejs/lib/osx-x64 + dst=scripts/nodejs/lib/osx-x64 + fi + cp -v build/install/lib/* $dst/ + + - name: replace files + shell: bash + run: | + cd nodejs-examples + files=$(ls -1 *.js) + for f in ${files[@]}; do + echo $f + sed -i.bak s%\'sherpa-onnx\'%\'./index.js\'% $f + git status + done + git diff + cp *.js ../scripts/nodejs + + - uses: actions/setup-node@v3 + with: + node-version: 13 + registry-url: 'https://registry.npmjs.org' + + - name: Display node version + shell: bash + run: | + node --version + npm --version + + - name: Run tests + shell: bash + run: | + node --version + npm --version + export d=scripts/nodejs + + pushd $d + npm install + npm install wav + popd + + ./.github/scripts/test-nodejs-npm.sh diff --git a/nodejs-examples/.gitignore b/nodejs-examples/.gitignore new file mode 100644 index 000000000..d5f19d89b --- /dev/null +++ b/nodejs-examples/.gitignore @@ -0,0 +1,2 @@ +node_modules +package-lock.json diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md new file mode 100644 index 000000000..1ee665e31 --- /dev/null +++ b/nodejs-examples/README.md @@ -0,0 +1,247 @@ +# Introduction + +This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). + +Before you continue, please first install the npm package `sherpa-onnx` by + +```bash +npm install sherpa-onnx +``` + +In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) +for text-to-speech and speech-to-text. + +**Caution**: If you get the following error: +``` +/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67 + if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) { + ^ + +TypeError: Cannot read properties of null (reading 'match') + at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21) + at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10) + at Object. (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28) + at Module._compile (node:internal/modules/cjs/loader:1376:14) + at Module._extensions..js (node:internal/modules/cjs/loader:1435:10) + at Module.load (node:internal/modules/cjs/loader:1207:32) + at Module._load (node:internal/modules/cjs/loader:1023:12) + at Module.require (node:internal/modules/cjs/loader:1235:19) + at require (node:internal/modules/helpers:176:18) + at Object. (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21) +``` + +Please downgrade your node to version v13.14.0. See also +https://github.com/node-ffi-napi/node-ffi-napi/issues/244 +and +https://github.com/node-ffi-napi/node-ffi-napi/issues/97 . + +# Text-to-speech + +In the following, we demonstrate how to run text-to-speech. + +## ./test-offline-tts-en.js + +[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use +a VITS pretrained model +[VCTK](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers) +for text-to-speech. + +You can use the following command to run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2 +tar xvf vits-vctk.tar.bz2 +node ./test-offline-tts-en.js +``` + +## ./test-offline-tts-zh.js + +[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use +a VITS pretrained model +[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3) +for text-to-speech. + +You can use the following command to run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 +tar xvf vits-zh-aishell3.tar.bz2 +node ./test-offline-tts-zh.js +``` + +# Speech-to-text + +In the following, we demonstrate how to decode files and how to perform +speech recognition with a microphone with `nodejs`. We need to install two additional +npm packages: + + +```bash +npm install wav naudiodon2 +``` + +## ./test-offline-nemo-ctc.js + +[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates +how to decode a file with a NeMo CTC model. In the code we use +[stt_en_conformer_ctc_small](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-small). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 +tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 +node ./test-offline-nemo-ctc.js +``` + +## ./test-offline-paraformer.js + +[./test-offline-paraformer.js](./test-offline-paraformer.js) demonstrates +how to decode a file with a non-streaming Paraformer model. In the code we use +[sherpa-onnx-paraformer-zh-2023-03-28](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +node ./test-offline-paraformer.js +``` + +## ./test-offline-transducer.js + +[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates +how to decode a file with a non-streaming transducer model. In the code we use +[sherpa-onnx-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +node ./test-offline-transducer.js +``` + +## ./test-offline-whisper.js +[./test-offline-whisper.js](./test-offline-whisper.js) demonstrates +how to decode a file with a Whisper model. In the code we use +[sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +node ./test-offline-whisper.js +``` + +## ./test-online-paraformer-microphone.js +[./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js) +demonstrates how to do real-time speech recognition from microphone +with a streaming Paraformer model. In the code we use +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +node ./test-online-paraformer-microphone.js +``` + +## ./test-online-paraformer.js +[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates +how to decode a file using a streaming Paraformer model. In the code we use +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 +node ./test-online-paraformer.js +``` + +## ./test-online-transducer-microphone.js +[./test-online-transducer-microphone.js](./test-online-transducer-microphone.js) +demonstrates how to do real-time speech recognition with microphone using a streaming transducer model. In the code +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english). + + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +node ./test-online-transducer-microphone.js +``` + +## ./test-online-transducer.js +[./test-online-transducer.js](./test-online-transducer.js) demonstrates +how to decode a file using a streaming transducer model. In the code +we use [sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +node ./test-online-transducer.js +``` + +## ./test-vad-microphone-offline-paraformer.js + +[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js) +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) +with non-streaming Paraformer for speech recognition from microphone. + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 +node ./test-vad-microphone-offline-paraformer.js +``` + +## ./test-vad-microphone-offline-transducer.js + +[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js) +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) +with a non-streaming transducer model for speech recognition from microphone. + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 +node ./test-vad-microphone-offline-transducer.js +``` + +## ./test-vad-microphone-offline-whisper.js + +[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js) +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad) +with whisper for speech recognition from microphone. + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 +node ./test-vad-microphone-offline-whisper.js +``` + +## ./test-vad-microphone.js + +[./test-vad-microphone.js](./test-vad-microphone.js) +demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad). + +You can use the following command run it: + +```bash +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +node ./test-vad-microphone.js +``` diff --git a/nodejs-examples/package.json b/nodejs-examples/package.json new file mode 100644 index 000000000..278762641 --- /dev/null +++ b/nodejs-examples/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "naudiodon2": "^2.4.0", + "sherpa-onnx": "^1.8.11", + "wav": "^1.0.2" + } +} diff --git a/nodejs-examples/test-offline-nemo-ctc.js b/nodejs-examples/test-offline-nemo-ctc.js index 2e76940f8..1cef7169b 100644 --- a/nodejs-examples/test-offline-nemo-ctc.js +++ b/nodejs-examples/test-offline-nemo-ctc.js @@ -1,32 +1,32 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig(); - nemoCtc.model = "./sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx"; - let tokens = "./sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt"; + const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig(); + nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx'; + const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.nemoCtc = nemoCtc; modelConfig.tokens = tokens; - modelConfig.modelType = "nemo_ctc"; + modelConfig.modelType = 'nemo_ctc'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } @@ -34,13 +34,13 @@ recognizer = createRecognizer(); stream = recognizer.createStream(); const waveFilename = - "./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav"; + './sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); -let buf = []; +const buf = []; -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -59,15 +59,16 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {highWaterMark : 4096}) +fs.createReadStream(waveFilename, {highWaterMark: 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); buf.push(floatSamples); - let flattened = Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); recognizer.decode(stream); @@ -78,14 +79,14 @@ fs.createReadStream(waveFilename, {highWaterMark : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; diff --git a/nodejs-examples/test-offline-paraformer.js b/nodejs-examples/test-offline-paraformer.js index 7e9654da5..c96977b40 100644 --- a/nodejs-examples/test-offline-paraformer.js +++ b/nodejs-examples/test-offline-paraformer.js @@ -1,45 +1,45 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); - paraformer.model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"; - let tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"; + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx'; + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.paraformer = paraformer; modelConfig.tokens = tokens; - modelConfig.modelType = "paraformer"; + modelConfig.modelType = 'paraformer'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); stream = recognizer.createStream(); -const waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav"; +const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); -let buf = []; +const buf = []; -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -58,15 +58,16 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); buf.push(floatSamples); - let flattened = Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); recognizer.decode(stream); @@ -77,14 +78,14 @@ fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; } diff --git a/nodejs-examples/test-offline-transducer.js b/nodejs-examples/test-offline-transducer.js index cb685201b..d86cb67b6 100644 --- a/nodejs-examples/test-offline-transducer.js +++ b/nodejs-examples/test-offline-transducer.js @@ -1,50 +1,50 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let transducer = new sherpa_onnx.OfflineTransducerModelConfig(); + const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); transducer.encoder = - "./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; transducer.decoder = - "./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; transducer.joiner = - "./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx"; - let tokens = "./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt"; + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.transducer = transducer; modelConfig.tokens = tokens; - modelConfig.modelType = "transducer"; + modelConfig.modelType = 'transducer'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); stream = recognizer.createStream(); -const waveFilename = "./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav"; +const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); -let buf = []; +const buf = []; -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -63,15 +63,16 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); buf.push(floatSamples); - let flattened = Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); recognizer.decode(stream); @@ -82,14 +83,14 @@ fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; } diff --git a/nodejs-examples/test-offline-tts-en.js b/nodejs-examples/test-offline-tts-en.js index ff8a84adf..e44e1a55c 100644 --- a/nodejs-examples/test-offline-tts-en.js +++ b/nodejs-examples/test-offline-tts-en.js @@ -1,26 +1,27 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createOfflineTts() { - let vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); - vits.model = "./vits-vctk/vits-vctk.onnx"; - vits.lexicon = "./vits-vctk/lexicon.txt"; - vits.tokens = "./vits-vctk/tokens.txt"; + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); + vits.model = './vits-vctk/vits-vctk.onnx'; + vits.lexicon = './vits-vctk/lexicon.txt'; + vits.tokens = './vits-vctk/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); modelConfig.vits = vits; - let config = new sherpa_onnx.OfflineTtsConfig(); + const config = new sherpa_onnx.OfflineTtsConfig(); config.model = modelConfig; return new sherpa_onnx.OfflineTts(config); } -let tts = createOfflineTts(); -let speakerId = 99; -let speed = 1.0; -let audio = tts.generate("Good morning. How are you doing?", speakerId, speed); -audio.save("./test-en.wav"); -console.log("Saved to test-en.wav successfully."); +const tts = createOfflineTts(); +const speakerId = 99; +const speed = 1.0; +const audio = + tts.generate('Good morning. How are you doing?', speakerId, speed); +audio.save('./test-en.wav'); +console.log('Saved to test-en.wav successfully.'); tts.free(); diff --git a/nodejs-examples/test-offline-tts-zh.js b/nodejs-examples/test-offline-tts-zh.js index 809d96903..16555c82b 100644 --- a/nodejs-examples/test-offline-tts-zh.js +++ b/nodejs-examples/test-offline-tts-zh.js @@ -1,27 +1,27 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createOfflineTts() { - let vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); - vits.model = "./vits-zh-aishell3/vits-aishell3.onnx"; - vits.lexicon = "./vits-zh-aishell3/lexicon.txt"; - vits.tokens = "./vits-zh-aishell3/tokens.txt"; + const vits = new sherpa_onnx.OfflineTtsVitsModelConfig(); + vits.model = './vits-zh-aishell3/vits-aishell3.onnx'; + vits.lexicon = './vits-zh-aishell3/lexicon.txt'; + vits.tokens = './vits-zh-aishell3/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); + const modelConfig = new sherpa_onnx.OfflineTtsModelConfig(); modelConfig.vits = vits; - let config = new sherpa_onnx.OfflineTtsConfig(); + const config = new sherpa_onnx.OfflineTtsConfig(); config.model = modelConfig; - config.ruleFsts = "./vits-zh-aishell3/rule.fst"; + config.ruleFsts = './vits-zh-aishell3/rule.fst'; return new sherpa_onnx.OfflineTts(config); } -let tts = createOfflineTts(); -let speakerId = 66; -let speed = 1.0; -let audio = tts.generate("3年前中国总人口是1411778724人", speakerId, speed); -audio.save("./test-zh.wav"); -console.log("Saved to test-zh.wav successfully."); +const tts = createOfflineTts(); +const speakerId = 66; +const speed = 1.0; +const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed); +audio.save('./test-zh.wav'); +console.log('Saved to test-zh.wav successfully.'); tts.free(); diff --git a/nodejs-examples/test-offline-whisper.js b/nodejs-examples/test-offline-whisper.js index a4e62d6c6..1dd320bdf 100644 --- a/nodejs-examples/test-offline-whisper.js +++ b/nodejs-examples/test-offline-whisper.js @@ -1,46 +1,46 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let whisper = new sherpa_onnx.OfflineWhisperModelConfig(); - whisper.encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"; - whisper.decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"; - let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"; + const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.whisper = whisper; modelConfig.tokens = tokens; - modelConfig.modelType = "whisper"; + modelConfig.modelType = 'whisper'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); stream = recognizer.createStream(); -const waveFilename = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"; +const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); -let buf = []; +const buf = []; -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -59,15 +59,16 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); buf.push(floatSamples); - let flattened = Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened); recognizer.decode(stream); @@ -78,14 +79,14 @@ fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; diff --git a/nodejs-examples/test-online-paraformer-microphone.js b/nodejs-examples/test-online-paraformer-microphone.js index cc27ad34b..60b28f6f9 100644 --- a/nodejs-examples/test-online-paraformer-microphone.js +++ b/nodejs-examples/test-online-paraformer-microphone.js @@ -1,34 +1,35 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const portAudio = require("naudiodon2"); +const portAudio = require('naudiodon2'); console.log(portAudio.getDevices()); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; - let paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); paraformer.encoder = - "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'; paraformer.decoder = - "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; - let tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'; + const tokens = + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; - let modelConfig = new sherpa_onnx.OnlineModelConfig(); + const modelConfig = new sherpa_onnx.OnlineModelConfig(); modelConfig.paraformer = paraformer; modelConfig.tokens = tokens; - modelConfig.modelType = "paraformer"; + modelConfig.modelType = 'paraformer'; - let recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; recognizerConfig.enableEndpoint = 1; - let recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); @@ -36,31 +37,31 @@ stream = recognizer.createStream(); display = new sherpa_onnx.Display(50); -let lastText = ""; +let lastText = ''; let segmentIndex = 0; -let ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - closeOnError : true, // Close the stream if an audio error is detected, if +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if // set false then just log the error - deviceId : -1, // Use -1 or omit the deviceId to select the default device - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : featConfig.sampleRate + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: recognizer.config.featConfig.sampleRate } }); -ai.on("data", data => { - let samples = new Float32Array(data.buffer); +ai.on('data', data => { + const samples = new Float32Array(data.buffer); - stream.acceptWaveform(recognizerConfig.featConfig.sampleRate, samples); + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); while (recognizer.isReady(stream)) { recognizer.decode(stream); } - let isEndpoint = recognizer.isEndpoint(stream); - let text = recognizer.getResult(stream).text; + const isEndpoint = recognizer.isEndpoint(stream); + const text = recognizer.getResult(stream).text; if (text.length > 0 && lastText != text) { lastText = text; @@ -75,11 +76,11 @@ ai.on("data", data => { } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); stream.free(); recognizer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/nodejs-examples/test-online-paraformer.js b/nodejs-examples/test-online-paraformer.js index d419911f1..e2b6a01b7 100644 --- a/nodejs-examples/test-online-paraformer.js +++ b/nodejs-examples/test-online-paraformer.js @@ -1,41 +1,42 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; - let paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); + const paraformer = new sherpa_onnx.OnlineParaformerModelConfig(); paraformer.encoder = - "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx'; paraformer.decoder = - "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; - let tokens = "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx'; + const tokens = + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; - let modelConfig = new sherpa_onnx.OnlineModelConfig(); + const modelConfig = new sherpa_onnx.OnlineModelConfig(); modelConfig.paraformer = paraformer; modelConfig.tokens = tokens; - modelConfig.modelType = "paraformer"; + modelConfig.modelType = 'paraformer'; - let recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); stream = recognizer.createStream(); const waveFilename = - "./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav"; + './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); @@ -50,7 +51,7 @@ function decode(samples) { console.log(r.text); } -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -69,9 +70,9 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); @@ -80,14 +81,14 @@ fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; diff --git a/nodejs-examples/test-online-transducer-microphone.js b/nodejs-examples/test-online-transducer-microphone.js index 81297d812..f16f10d76 100644 --- a/nodejs-examples/test-online-transducer-microphone.js +++ b/nodejs-examples/test-online-transducer-microphone.js @@ -1,60 +1,60 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const portAudio = require("naudiodon2"); +const portAudio = require('naudiodon2'); // console.log(portAudio.getDevices()); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let transducer = new sherpa_onnx.OnlineTransducerModelConfig(); + const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); transducer.encoder = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; transducer.decoder = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; transducer.joiner = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"; - let tokens = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; + const tokens = + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; - let modelConfig = new sherpa_onnx.OnlineModelConfig(); + const modelConfig = new sherpa_onnx.OnlineModelConfig(); modelConfig.transducer = transducer; modelConfig.tokens = tokens; - modelConfig.modelType = "zipformer"; + modelConfig.modelType = 'zipformer'; - let recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; recognizerConfig.enableEndpoint = 1; - let recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); return recognizer; } recognizer = createRecognizer(); stream = recognizer.createStream(); display = new sherpa_onnx.Display(50); -let lastText = ""; +let lastText = ''; let segmentIndex = 0; -let ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - closeOnError : true, // Close the stream if an audio error is detected, if +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if // set false then just log the error - deviceId : -1, // Use -1 or omit the deviceId to select the default device - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : recognizer.config.featConfig.sampleRate + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: recognizer.config.featConfig.sampleRate } }); -ai.on("data", data => { - let samples = new Float32Array(data.buffer); +ai.on('data', data => { + const samples = new Float32Array(data.buffer); stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); @@ -62,8 +62,8 @@ ai.on("data", data => { recognizer.decode(stream); } - let isEndpoint = recognizer.isEndpoint(stream); - let text = recognizer.getResult(stream).text; + const isEndpoint = recognizer.isEndpoint(stream); + const text = recognizer.getResult(stream).text; if (text.length > 0 && lastText != text) { lastText = text; @@ -78,11 +78,11 @@ ai.on("data", data => { } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); stream.free(); recognizer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/nodejs-examples/test-online-transducer.js b/nodejs-examples/test-online-transducer.js index 60a961bff..822b97dae 100644 --- a/nodejs-examples/test-online-transducer.js +++ b/nodejs-examples/test-online-transducer.js @@ -1,36 +1,36 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const fs = require("fs"); -const {Readable} = require("stream"); -const wav = require("wav"); +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); -const sherpa_onnx = require("./index.js"); +const sherpa_onnx = require('sherpa-onnx'); function createRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let transducer = new sherpa_onnx.OnlineTransducerModelConfig(); + const transducer = new sherpa_onnx.OnlineTransducerModelConfig(); transducer.encoder = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; transducer.decoder = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; transducer.joiner = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"; - let tokens = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx'; + const tokens = + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; - let modelConfig = new sherpa_onnx.OnlineModelConfig(); + const modelConfig = new sherpa_onnx.OnlineModelConfig(); modelConfig.transducer = transducer; modelConfig.tokens = tokens; - modelConfig.modelType = "zipformer"; + modelConfig.modelType = 'zipformer'; - let recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig); return recognizer; @@ -39,7 +39,7 @@ recognizer = createRecognizer(); stream = recognizer.createStream(); const waveFilename = - "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav"; + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); @@ -54,7 +54,7 @@ function decode(samples) { console.log(r.text); } -reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { if (sampleRate != recognizer.config.featConfig.sampleRate) { throw new Error(`Only support sampleRate ${ recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); @@ -73,9 +73,9 @@ reader.on("format", ({audioFormat, bitDepth, channels, sampleRate}) => { } }); -fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) .pipe(reader) - .on("finish", function(err) { + .on('finish', function(err) { // tail padding const floatSamples = new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); @@ -84,14 +84,14 @@ fs.createReadStream(waveFilename, {"highWaterMark" : 4096}) recognizer.free(); }); -readable.on("readable", function() { +readable.on('readable', function() { let chunk; while ((chunk = readable.read()) != null) { - const int16Samples = - new Int16Array(chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); - let floatSamples = new Float32Array(int16Samples.length); + const floatSamples = new Float32Array(int16Samples.length); for (let i = 0; i < floatSamples.length; i++) { floatSamples[i] = int16Samples[i] / 32768.0; diff --git a/nodejs-examples/test-vad-microphone-offline-paraformer.js b/nodejs-examples/test-vad-microphone-offline-paraformer.js index 8a8ac0ae8..f5311bea4 100644 --- a/nodejs-examples/test-vad-microphone-offline-paraformer.js +++ b/nodejs-examples/test-vad-microphone-offline-paraformer.js @@ -1,87 +1,87 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const sherpa_onnx = require("./index.js"); -const portAudio = require("naudiodon2"); +const sherpa_onnx = require('sherpa-onnx3'); +const portAudio = require('naudiodon2'); console.log(portAudio.getDevices()); function createOfflineRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); - paraformer.model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx"; - let tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt"; + const paraformer = new sherpa_onnx.OfflineParaformerModelConfig(); + paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx'; + const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.paraformer = paraformer; modelConfig.tokens = tokens; - modelConfig.modelType = "paraformer"; + modelConfig.modelType = 'paraformer'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer } function createVad() { - let sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); - sileroVadModelConfig.model = "./silero_vad.onnx"; - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); + sileroVadModelConfig.model = './silero_vad.onnx'; + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds sileroVadModelConfig.windowSize = 512; - let vadModelConfig = new sherpa_onnx.VadModelConfig(); + const vadModelConfig = new sherpa_onnx.VadModelConfig(); vadModelConfig.sileroVad = sileroVadModelConfig; vadModelConfig.sampleRate = 16000; - let bufferSizeInSeconds = 60; - let vad = new sherpa_onnx.VoiceActivityDetector(vadModelConfig, - bufferSizeInSeconds); + const bufferSizeInSeconds = 60; + const vad = new sherpa_onnx.VoiceActivityDetector( + vadModelConfig, bufferSizeInSeconds); return vad; } -let recognizer = createOfflineRecognizer(); -let vad = createVad(); +const recognizer = createOfflineRecognizer(); +const vad = createVad(); -let bufferSizeInSeconds = 30; -let buffer = +const bufferSizeInSeconds = 30; +const buffer = new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); var ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : vad.config.sampleRate, - deviceId : -1, // Use -1 or omit the deviceId to select the default device - closeOnError : true // Close the stream if an audio error is detected, if + inOptions: { + channelCount: 1, + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: vad.config.sampleRate, + deviceId: -1, // Use -1 or omit the deviceId to select the default device + closeOnError: true // Close the stream if an audio error is detected, if // set false then just log the error } }); let printed = false; let index = 0; -ai.on("data", data => { - let windowSize = vad.config.sileroVad.windowSize; +ai.on('data', data => { + const windowSize = vad.config.sileroVad.windowSize; buffer.push(new Float32Array(data.buffer)); while (buffer.size() > windowSize) { - let samples = buffer.get(buffer.head(), windowSize); + const samples = buffer.get(buffer.head(), windowSize); buffer.pop(windowSize); vad.acceptWaveform(samples) } while (!vad.isEmpty()) { - let segment = vad.front(); + const segment = vad.front(); vad.pop(); - let stream = recognizer.createStream(); - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, - segment.samples); + const stream = recognizer.createStream(); + stream.acceptWaveform( + recognizer.config.featConfig.sampleRate, segment.samples); recognizer.decode(stream); - let r = recognizer.getResult(stream); + const r = recognizer.getResult(stream); stream.free(); if (r.text.length > 0) { console.log(`${index}: ${r.text}`); @@ -90,12 +90,12 @@ ai.on("data", data => { } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); recognizer.free(); vad.free(); buffer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/nodejs-examples/test-vad-microphone-offline-transducer.js b/nodejs-examples/test-vad-microphone-offline-transducer.js index b2c080b3d..4cf6d7176 100644 --- a/nodejs-examples/test-vad-microphone-offline-transducer.js +++ b/nodejs-examples/test-vad-microphone-offline-transducer.js @@ -1,92 +1,92 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const sherpa_onnx = require("./index.js"); -const portAudio = require("naudiodon2"); +const sherpa_onnx = require('sherpa-onnx'); +const portAudio = require('naudiodon2'); console.log(portAudio.getDevices()); function createOfflineRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let transducer = new sherpa_onnx.OfflineTransducerModelConfig(); + const transducer = new sherpa_onnx.OfflineTransducerModelConfig(); transducer.encoder = - "./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx'; transducer.decoder = - "./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx"; + './sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx'; transducer.joiner = - "./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx"; - let tokens = "./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt"; + './sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx'; + const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.transducer = transducer; modelConfig.tokens = tokens; - modelConfig.modelType = "transducer"; + modelConfig.modelType = 'transducer'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } function createVad() { - let sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); - sileroVadModelConfig.model = "./silero_vad.onnx"; - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); + sileroVadModelConfig.model = './silero_vad.onnx'; + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds sileroVadModelConfig.windowSize = 512; - let vadModelConfig = new sherpa_onnx.VadModelConfig(); + const vadModelConfig = new sherpa_onnx.VadModelConfig(); vadModelConfig.sileroVad = sileroVadModelConfig; vadModelConfig.sampleRate = 16000; - let bufferSizeInSeconds = 60; - let vad = new sherpa_onnx.VoiceActivityDetector(vadModelConfig, - bufferSizeInSeconds); + const bufferSizeInSeconds = 60; + const vad = new sherpa_onnx.VoiceActivityDetector( + vadModelConfig, bufferSizeInSeconds); return vad; } -let recognizer = createOfflineRecognizer(); -let vad = createVad(); +const recognizer = createOfflineRecognizer(); +const vad = createVad(); -let bufferSizeInSeconds = 30; -let buffer = +const bufferSizeInSeconds = 30; +const buffer = new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); -let ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - closeOnError : true, // Close the stream if an audio error is detected, if +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if // set false then just log the error - deviceId : -1, // Use -1 or omit the deviceId to select the default device - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : vad.config.sampleRate + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: vad.config.sampleRate } }); let printed = false; let index = 0; -ai.on("data", data => { - let windowSize = vad.config.sileroVad.windowSize; +ai.on('data', data => { + const windowSize = vad.config.sileroVad.windowSize; buffer.push(new Float32Array(data.buffer)); while (buffer.size() > windowSize) { - let samples = buffer.get(buffer.head(), windowSize); + const samples = buffer.get(buffer.head(), windowSize); buffer.pop(windowSize); vad.acceptWaveform(samples) } while (!vad.isEmpty()) { - let segment = vad.front(); + const segment = vad.front(); vad.pop(); - let stream = recognizer.createStream(); - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, - segment.samples); + const stream = recognizer.createStream(); + stream.acceptWaveform( + recognizer.config.featConfig.sampleRate, segment.samples); recognizer.decode(stream); - let r = recognizer.getResult(stream); + const r = recognizer.getResult(stream); stream.free(); if (r.text.length > 0) { console.log(`${index}: ${r.text}`); @@ -95,12 +95,12 @@ ai.on("data", data => { } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); recognizer.free(); vad.free(); buffer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/nodejs-examples/test-vad-microphone-offline-whisper.js b/nodejs-examples/test-vad-microphone-offline-whisper.js index 2877663b2..07a344b89 100644 --- a/nodejs-examples/test-vad-microphone-offline-whisper.js +++ b/nodejs-examples/test-vad-microphone-offline-whisper.js @@ -1,88 +1,88 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) // -const sherpa_onnx = require("./index.js"); -const portAudio = require("naudiodon2"); +const sherpa_onnx = require('sherpa-onnx'); +const portAudio = require('naudiodon2'); console.log(portAudio.getDevices()); function createOfflineRecognizer() { - let featConfig = new sherpa_onnx.FeatureConfig(); + const featConfig = new sherpa_onnx.FeatureConfig(); featConfig.sampleRate = 16000; featConfig.featureDim = 80; // test online recognizer - let whisper = new sherpa_onnx.OfflineWhisperModelConfig(); - whisper.encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"; - whisper.decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"; - let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"; + const whisper = new sherpa_onnx.OfflineWhisperModelConfig(); + whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; + whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; + const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; - let modelConfig = new sherpa_onnx.OfflineModelConfig(); + const modelConfig = new sherpa_onnx.OfflineModelConfig(); modelConfig.whisper = whisper; modelConfig.tokens = tokens; - modelConfig.modelType = "whisper"; + modelConfig.modelType = 'whisper'; - let recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); + const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig(); recognizerConfig.featConfig = featConfig; recognizerConfig.modelConfig = modelConfig; - recognizerConfig.decodingMethod = "greedy_search"; + recognizerConfig.decodingMethod = 'greedy_search'; - let recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); + const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig); return recognizer; } function createVad() { - let sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); - sileroVadModelConfig.model = "./silero_vad.onnx"; - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); + sileroVadModelConfig.model = './silero_vad.onnx'; + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds sileroVadModelConfig.windowSize = 512; - let vadModelConfig = new sherpa_onnx.VadModelConfig(); + const vadModelConfig = new sherpa_onnx.VadModelConfig(); vadModelConfig.sileroVad = sileroVadModelConfig; vadModelConfig.sampleRate = 16000; - let bufferSizeInSeconds = 60; - let vad = new sherpa_onnx.VoiceActivityDetector(vadModelConfig, - bufferSizeInSeconds); + const bufferSizeInSeconds = 60; + const vad = new sherpa_onnx.VoiceActivityDetector( + vadModelConfig, bufferSizeInSeconds); return vad; } -let recognizer = createOfflineRecognizer(); -let vad = createVad(); +const recognizer = createOfflineRecognizer(); +const vad = createVad(); -let bufferSizeInSeconds = 30; -let buffer = +const bufferSizeInSeconds = 30; +const buffer = new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); -let ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - closeOnError : true, // Close the stream if an audio error is detected, if +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if // set false then just log the error - deviceId : -1, // Use -1 or omit the deviceId to select the default device - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : vad.config.sampleRate + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: vad.config.sampleRate } }); let printed = false; let index = 0; -ai.on("data", data => { - let windowSize = vad.config.sileroVad.windowSize; +ai.on('data', data => { + const windowSize = vad.config.sileroVad.windowSize; buffer.push(new Float32Array(data.buffer)); while (buffer.size() > windowSize) { - let samples = buffer.get(buffer.head(), windowSize); + const samples = buffer.get(buffer.head(), windowSize); buffer.pop(windowSize); vad.acceptWaveform(samples) } while (!vad.isEmpty()) { - let segment = vad.front(); + const segment = vad.front(); vad.pop(); - let stream = recognizer.createStream(); - stream.acceptWaveform(recognizer.config.featConfig.sampleRate, - segment.samples); + const stream = recognizer.createStream(); + stream.acceptWaveform( + recognizer.config.featConfig.sampleRate, segment.samples); recognizer.decode(stream); - let r = recognizer.getResult(stream); + const r = recognizer.getResult(stream); stream.free(); if (r.text.length > 0) { console.log(`${index}: ${r.text}`); @@ -91,12 +91,12 @@ ai.on("data", data => { } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); recognizer.free(); vad.free(); buffer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/nodejs-examples/test-vad-microphone.js b/nodejs-examples/test-vad-microphone.js index cb0a369b4..ec65b50fc 100644 --- a/nodejs-examples/test-vad-microphone.js +++ b/nodejs-examples/test-vad-microphone.js @@ -1,48 +1,48 @@ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -const sherpa_onnx = require("./index.js"); -const portAudio = require("naudiodon2"); +const sherpa_onnx = require('sherpa-onnx'); +const portAudio = require('naudiodon2'); console.log(portAudio.getDevices()); function createVad() { - let sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); - sileroVadModelConfig.model = "./silero_vad.onnx"; - sileroVadModelConfig.minSpeechDuration = 0.3; // seconds - sileroVadModelConfig.minSilenceDuration = 0.3; // seconds + const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig(); + sileroVadModelConfig.model = './silero_vad.onnx'; + sileroVadModelConfig.minSpeechDuration = 0.3; // seconds + sileroVadModelConfig.minSilenceDuration = 0.3; // seconds sileroVadModelConfig.windowSize = 512; - let vadModelConfig = new sherpa_onnx.VadModelConfig(); + const vadModelConfig = new sherpa_onnx.VadModelConfig(); vadModelConfig.sileroVad = sileroVadModelConfig; vadModelConfig.sampleRate = 16000; - let bufferSizeInSeconds = 60; - let vad = new sherpa_onnx.VoiceActivityDetector(vadModelConfig, - bufferSizeInSeconds); + const bufferSizeInSeconds = 60; + const vad = new sherpa_onnx.VoiceActivityDetector( + vadModelConfig, bufferSizeInSeconds); return vad; } vad = createVad(); -let bufferSizeInSeconds = 30; -let buffer = +const bufferSizeInSeconds = 30; +const buffer = new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); -let ai = new portAudio.AudioIO({ - inOptions : { - channelCount : 1, - closeOnError : true, // Close the stream if an audio error is detected, if +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if // set false then just log the error - deviceId : -1, // Use -1 or omit the deviceId to select the default device - sampleFormat : portAudio.SampleFormatFloat32, - sampleRate : vad.config.sampleRate + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: vad.config.sampleRate } }); let printed = false; let index = 0; -ai.on("data", data => { - let windowSize = vad.config.sileroVad.windowSize; +ai.on('data', data => { + const windowSize = vad.config.sileroVad.windowSize; buffer.push(new Float32Array(data.buffer)); while (buffer.size() > windowSize) { - let samples = buffer.get(buffer.head(), windowSize); + const samples = buffer.get(buffer.head(), windowSize); buffer.pop(windowSize); vad.acceptWaveform(samples) if (vad.isDetected() && !printed) { @@ -55,20 +55,20 @@ ai.on("data", data => { } while (!vad.isEmpty()) { - let segment = vad.front(); + const segment = vad.front(); vad.pop(); - let duration = segment.samples.length / vad.config.sampleRate; + const duration = segment.samples.length / vad.config.sampleRate; console.log(`${index} End of speech. Duration: ${duration} seconds`); index += 1; } } }); -ai.on("close", () => { - console.log("Free resources"); +ai.on('close', () => { + console.log('Free resources'); vad.free(); buffer.free(); }); ai.start(); -console.log("Started! Please speak") +console.log('Started! Please speak') diff --git a/scripts/nodejs/README.md b/scripts/nodejs/README.md index 60d2590c2..ed520597e 100644 --- a/scripts/nodejs/README.md +++ b/scripts/nodejs/README.md @@ -1,6 +1,8 @@ # Introduction -Text-to-speech and speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/) +Text-to-speech and speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/). + +It processes everything locally without accessing the Internet. Please refer to https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples diff --git a/scripts/nodejs/package.json.in b/scripts/nodejs/package.json.in index 93190cb4a..b097edc90 100644 --- a/scripts/nodejs/package.json.in +++ b/scripts/nodejs/package.json.in @@ -1,5 +1,5 @@ { - "name": "sherpa-onnx3", + "name": "sherpa-onnx", "version": "SHERPA_ONNX_VERSION", "description": "Real-time speech recognition with Next-gen Kaldi", "main": "index.js", @@ -11,15 +11,28 @@ "url": "git+https://github.com/k2-fsa/sherpa-onnx.git" }, "keywords": [ - "speech-to-text", - "text-to-speech", + "speech to text", + "text to speech", + "transcription", "real-time speech recognition", "without internet connect", "embedded systems", "open source", "zipformer", "asr", - "speech" + "tts", + "stt", + "c++", + "onnxruntime", + "onnx", + "ai", + "next-gen kaldi", + "offline", + "privacy", + "open source", + "streaming speech recognition", + "speech", + "recognition" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/scripts/nodejs/run.sh b/scripts/nodejs/run.sh index dbf27470a..213a87ecf 100755 --- a/scripts/nodejs/run.sh +++ b/scripts/nodejs/run.sh @@ -59,8 +59,11 @@ function linux_x64() { wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - cp -v sherpa_onnx/lib/*.so $dst + cp -v sherpa_onnx/lib/*.so* $dst rm -v $dst/libcargs.so + rm -v $dst/libsherpa-onnx-portaudio.so + rm -v $dst/libsherpa-onnx-fst.so + rm -v $dst/libonnxruntime.so cd .. rm -rf t @@ -79,6 +82,7 @@ function osx_x64() { rm -v $dst/libonnxruntime.dylib rm -v $dst/libcargs.dylib rm -v $dst/libsherpa-onnx-fst.dylib + rm -v $dst/libsherpa-onnx-portaudio.dylib cd .. rm -rf t @@ -97,6 +101,7 @@ function osx_arm64() { rm -v $dst/libonnxruntime.dylib rm -v $dst/libcargs.dylib rm -v $dst/libsherpa-onnx-fst.dylib + rm -v $dst/libsherpa-onnx-portaudio.dylib cd .. rm -rf t