From 1f5c0a87b9b89fb25d5ea853d07335119767ebed Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 16 Jun 2024 19:15:09 +0800 Subject: [PATCH] Add CI for ksponspeech (#1655) --- .github/scripts/ksponspeech/ASR/run.sh | 72 +++++++++++++++ .github/workflows/ksponspeech.yml | 118 +++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100755 .github/scripts/ksponspeech/ASR/run.sh create mode 100644 .github/workflows/ksponspeech.yml diff --git a/.github/scripts/ksponspeech/ASR/run.sh b/.github/scripts/ksponspeech/ASR/run.sh new file mode 100755 index 0000000000..068c22dfcb --- /dev/null +++ b/.github/scripts/ksponspeech/ASR/run.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/ksponspeech/ASR + + +function test_pretrained() { + git lfs install + git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12 + repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12 + pushd $repo + mkdir test_wavs + cd test_wavs + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav + cd ../exp + ln -s pretrained.pt epoch-99.pt + ls -lh + popd + + log 'test pretrained.py' + ./pruned_transducer_stateless7_streaming/pretrained.py \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + --method greedy_search \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav + + log 'test export-onnx.py' + + ./pruned_transducer_stateless7_streaming/export-onnx.py \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --decode-chunk-len 32 \ + --exp-dir $repo/exp/ + + ls -lh $repo/exp + + ls -lh $repo/data/lang_bpe_5000/ + + log 'test exported onnx models' + ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + $repo/test_wavs/0.wav + + dst=/tmp/model1 + mkdir -p $dst + + cp -v $repo/exp/*.onnx $dst + cp -v $repo/exp/*.onnx $dst + cp -v $repo/data/lang_bpe_5000/tokens.txt $dst + cp -v $repo/data/lang_bpe_5000/bpe.model $dst + rm -rf $repo +} + +test_pretrained diff --git a/.github/workflows/ksponspeech.yml b/.github/workflows/ksponspeech.yml new file mode 100644 index 0000000000..2e1441c066 --- /dev/null +++ b/.github/workflows/ksponspeech.yml @@ -0,0 +1,118 @@ +name: ksponspeech + +on: + push: + branches: + - ksponspeech + + workflow_dispatch: + +jobs: + ksponspeech: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: [3.8] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + cache-dependency-path: '**/requirements-ci.txt' + + - name: Install Python dependencies + run: | + grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install + pip uninstall -y protobuf + pip install --no-binary protobuf protobuf==3.20.* + + - name: Cache kaldifeat + id: my-cache + uses: actions/cache@v2 + with: + path: | + ~/tmp/kaldifeat + key: cache-tmp-${{ matrix.python-version }}-2023-05-22 + + - name: Install kaldifeat + if: steps.my-cache.outputs.cache-hit != 'true' + shell: bash + run: | + .github/scripts/install-kaldifeat.sh + + - name: Test + shell: bash + run: | + export PYTHONPATH=$PWD:$PYTHONPATH + export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH + export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH + + .github/scripts/ksponspeech/ASR/run.sh + + - name: Show model files + shell: bash + run: | + src=/tmp/model1 + ls -lh $src + + - name: Upload model to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + src=/tmp/model1 + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf hf + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf + cd hf + git fetch + git pull + git merge -m "merge remote" --ff origin main + cp -v $src/* ./ + ls -lh + git lfs track "bpe.model" + git lfs track "*.onnx" + cp -av test_wavs $src/ + git add . + git status + git commit -m "update models" + git status + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true + rm -rf hf + + - name: Prepare for release + shell: bash + run: | + src=/tmp/model1 + d=sherpa-onnx-streaming-zipformer-korean-2024-06-16 + mv $src ./$d + tar cjvf ${d}.tar.bz2 $d + ls -lh + + - name: Release exported onnx models + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*.tar.bz2 + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: asr-models