From 1f5c0a87b9b89fb25d5ea853d07335119767ebed Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Sun, 16 Jun 2024 19:15:09 +0800
Subject: [PATCH] Add CI for ksponspeech (#1655)

---
 .github/scripts/ksponspeech/ASR/run.sh |  72 +++++++++++++++
 .github/workflows/ksponspeech.yml      | 118 +++++++++++++++++++++++++
 2 files changed, 190 insertions(+)
 create mode 100755 .github/scripts/ksponspeech/ASR/run.sh
 create mode 100644 .github/workflows/ksponspeech.yml

diff --git a/.github/scripts/ksponspeech/ASR/run.sh b/.github/scripts/ksponspeech/ASR/run.sh
new file mode 100755
index 0000000000..068c22dfcb
--- /dev/null
+++ b/.github/scripts/ksponspeech/ASR/run.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/ksponspeech/ASR
+
+
+function test_pretrained() {
+  git lfs install
+  git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
+  repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
+  pushd $repo
+  mkdir test_wavs
+  cd test_wavs
+  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
+  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
+  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
+  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
+  cd ../exp
+  ln -s pretrained.pt epoch-99.pt
+  ls -lh
+  popd
+
+  log 'test pretrained.py'
+  ./pruned_transducer_stateless7_streaming/pretrained.py \
+      --checkpoint $repo/exp/pretrained.pt \
+      --tokens $repo/data/lang_bpe_5000/tokens.txt \
+      --method greedy_search \
+      $repo/test_wavs/0.wav \
+      $repo/test_wavs/1.wav \
+      $repo/test_wavs/2.wav \
+      $repo/test_wavs/3.wav
+
+  log 'test export-onnx.py'
+
+  ./pruned_transducer_stateless7_streaming/export-onnx.py \
+    --tokens $repo/data/lang_bpe_5000/tokens.txt \
+    --use-averaged-model 0 \
+    --epoch 99 \
+    --avg 1 \
+    --decode-chunk-len 32 \
+    --exp-dir $repo/exp/
+
+  ls -lh $repo/exp
+
+  ls -lh $repo/data/lang_bpe_5000/
+
+  log 'test exported onnx models'
+  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
+    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+    --tokens $repo/data/lang_bpe_5000/tokens.txt \
+    $repo/test_wavs/0.wav
+
+  dst=/tmp/model1
+  mkdir -p $dst
+
+  cp -v $repo/exp/*.onnx $dst
+  cp -v $repo/exp/*.onnx $dst
+  cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
+  cp -v $repo/data/lang_bpe_5000/bpe.model $dst
+  rm -rf $repo
+}
+
+test_pretrained
diff --git a/.github/workflows/ksponspeech.yml b/.github/workflows/ksponspeech.yml
new file mode 100644
index 0000000000..2e1441c066
--- /dev/null
+++ b/.github/workflows/ksponspeech.yml
@@ -0,0 +1,118 @@
+name: ksponspeech
+
+on:
+  push:
+    branches:
+      - ksponspeech
+
+  workflow_dispatch:
+
+jobs:
+  ksponspeech:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements-ci.txt'
+
+      - name: Install Python dependencies
+        run: |
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          pip uninstall -y protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+      - name: Cache kaldifeat
+        id: my-cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/kaldifeat
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
+
+      - name: Install kaldifeat
+        if: steps.my-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Test
+        shell: bash
+        run: |
+          export PYTHONPATH=$PWD:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
+          .github/scripts/ksponspeech/ASR/run.sh
+
+      - name: Show model files
+        shell: bash
+        run: |
+          src=/tmp/model1
+          ls -lh $src
+
+      - name: Upload model to huggingface
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            src=/tmp/model1
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf hf
+            export GIT_LFS_SKIP_SMUDGE=1
+            export GIT_CLONE_PROTECTION_ACTIVE=false
+
+            git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
+            cd hf
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+            cp -v $src/* ./
+            ls -lh
+            git lfs track "bpe.model"
+            git lfs track "*.onnx"
+            cp -av test_wavs $src/
+            git add .
+            git status
+            git commit -m "update models"
+            git status
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
+            rm -rf hf
+
+      - name: Prepare for release
+        shell: bash
+        run: |
+          src=/tmp/model1
+          d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
+          mv $src ./$d
+          tar cjvf ${d}.tar.bz2 $d
+          ls -lh
+
+      - name: Release exported onnx models
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: sherpa-onnx-*.tar.bz2
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models