diff --git a/.github/scripts/audioset/AT/run.sh b/.github/scripts/audioset/AT/run.sh new file mode 100755 index 0000000000..87856b64da --- /dev/null +++ b/.github/scripts/audioset/AT/run.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +set -ex + +python3 -m pip install onnxoptimizer onnxsim + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/audioset/AT + +function test_pretrained() { + repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12 + repo=$(basename $repo_url) + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + pushd $repo/exp + git lfs pull --include pretrained.pt + ln -s pretrained.pt epoch-99.pt + ls -lh + popd + + log "test pretrained.pt" + + python3 zipformer/pretrained.py \ + --checkpoint $repo/exp/pretrained.pt \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav + + log "test jit export" + ls -lh $repo/exp/ + python3 zipformer/export.py \ + --exp-dir $repo/exp \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model 0 \ + --jit 1 + ls -lh $repo/exp/ + + log "test jit models" + python3 zipformer/jit_pretrained.py \ + --nn-model-filename $repo/exp/jit_script.pt \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav + + log "test onnx export" + ls -lh $repo/exp/ + python3 zipformer/export-onnx.py \ + --exp-dir $repo/exp \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model 0 + + ls -lh $repo/exp/ + + pushd $repo/exp/ + mv model-epoch-99-avg-1.onnx model.onnx + mv model-epoch-99-avg-1.int8.onnx model.int8.onnx + popd + + ls -lh $repo/exp/ + + log "test onnx models" + for m in model.onnx model.int8.onnx; do + log "$m" + python3 zipformer/onnx_pretrained.py \ + --model-filename $repo/exp/model.onnx \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav + done + + log "prepare data for uploading to huggingface" + dst=/icefall/model-onnx + mkdir -p $dst + cp -v $repo/exp/*.onnx $dst/ + cp -v $repo/data/* $dst/ + cp -av $repo/test_wavs $dst + + ls -lh $dst + ls -lh $dst/test_wavs +} + +test_pretrained diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index f64446e7e4..15f49f8264 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -49,6 +49,8 @@ RUN pip install --no-cache-dir \ multi_quantization \ numba \ numpy \ + onnxoptimizer \ + onnxsim \ onnx \ onnxmltools \ onnxruntime \ diff --git a/.github/workflows/audioset.yml b/.github/workflows/audioset.yml new file mode 100644 index 0000000000..280ef8f8e4 --- /dev/null +++ b/.github/workflows/audioset.yml @@ -0,0 +1,137 @@ +name: audioset + +on: + push: + branches: + - master + + pull_request: + branches: + - master + + workflow_dispatch: + +concurrency: + group: audioset-${{ github.ref }} + cancel-in-progress: true + +jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" + + audioset: + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Free space + shell: bash + run: | + ls -lh + df -h + rm -rf /opt/hostedtoolcache + df -h + echo "pwd: $PWD" + echo "github.workspace ${{ github.workspace }}" + + - name: Run tests + uses: addnab/docker-run-action@v3 + with: + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall + + .github/scripts/audioset/AT/run.sh + + - name: Show model files + shell: bash + run: | + sudo chown -R runner ./model-onnx + ls -lh ./model-onnx + chmod -x ./model-onnx/class_labels_indices.csv + + echo "----------" + ls -lh ./model-onnx/* + + - name: Upload model to huggingface + if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + cp ../model-onnx/*.onnx ./ + cp ../model-onnx/*.csv ./ + cp -a ../model-onnx/test_wavs ./ + ls -lh + git add . + git status + git commit -m "update models" + git status + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true + rm -rf huggingface + + - name: Prepare for release + if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push' + shell: bash + run: | + d=sherpa-onnx-zipformer-audio-tagging-2024-04-09 + mv ./model-onnx $d + tar cjvf ${d}.tar.bz2 $d + ls -lh + + - name: Release exported onnx models + if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push' + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*.tar.bz2 + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: audio-tagging-models + diff --git a/docker/torch1.12.1-cuda11.3.dockerfile b/docker/torch1.12.1-cuda11.3.dockerfile index 33ecbf4d19..9815a8ec7c 100644 --- a/docker/torch1.12.1-cuda11.3.dockerfile +++ b/docker/torch1.12.1-cuda11.3.dockerfile @@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch1.13.0-cuda11.6.dockerfile b/docker/torch1.13.0-cuda11.6.dockerfile index b4d62b0bc2..d13d2a7cb0 100644 --- a/docker/torch1.13.0-cuda11.6.dockerfile +++ b/docker/torch1.13.0-cuda11.6.dockerfile @@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch1.9.0-cuda10.2.dockerfile b/docker/torch1.9.0-cuda10.2.dockerfile index 4d2d3058a3..5936fe06a9 100644 --- a/docker/torch1.9.0-cuda10.2.dockerfile +++ b/docker/torch1.9.0-cuda10.2.dockerfile @@ -69,6 +69,8 @@ RUN pip uninstall -y tqdm && \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.0.0-cuda11.7.dockerfile b/docker/torch2.0.0-cuda11.7.dockerfile index 31ff09ac69..e2e27b55de 100644 --- a/docker/torch2.0.0-cuda11.7.dockerfile +++ b/docker/torch2.0.0-cuda11.7.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.1.0-cuda11.8.dockerfile b/docker/torch2.1.0-cuda11.8.dockerfile index 83b64a8d24..de1e07e694 100644 --- a/docker/torch2.1.0-cuda11.8.dockerfile +++ b/docker/torch2.1.0-cuda11.8.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.1.0-cuda12.1.dockerfile b/docker/torch2.1.0-cuda12.1.dockerfile index ec366a8981..89303797a5 100644 --- a/docker/torch2.1.0-cuda12.1.dockerfile +++ b/docker/torch2.1.0-cuda12.1.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.0-cuda11.8.dockerfile b/docker/torch2.2.0-cuda11.8.dockerfile index 143f0e0661..3364477a8d 100644 --- a/docker/torch2.2.0-cuda11.8.dockerfile +++ b/docker/torch2.2.0-cuda11.8.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.0-cuda12.1.dockerfile b/docker/torch2.2.0-cuda12.1.dockerfile index c6d5a771f6..3cc41902d0 100644 --- a/docker/torch2.2.0-cuda12.1.dockerfile +++ b/docker/torch2.2.0-cuda12.1.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.1-cuda11.8.dockerfile b/docker/torch2.2.1-cuda11.8.dockerfile index d874134d77..76b7856228 100644 --- a/docker/torch2.2.1-cuda11.8.dockerfile +++ b/docker/torch2.2.1-cuda11.8.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.1-cuda12.1.dockerfile b/docker/torch2.2.1-cuda12.1.dockerfile index 6e4ef290a3..55bdfa4d7a 100644 --- a/docker/torch2.2.1-cuda12.1.dockerfile +++ b/docker/torch2.2.1-cuda12.1.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.2-cuda11.8.dockerfile b/docker/torch2.2.2-cuda11.8.dockerfile index bca40a0658..02de82c504 100644 --- a/docker/torch2.2.2-cuda11.8.dockerfile +++ b/docker/torch2.2.2-cuda11.8.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/docker/torch2.2.2-cuda12.1.dockerfile b/docker/torch2.2.2-cuda12.1.dockerfile index 4fb8946e75..44ad38b8e7 100644 --- a/docker/torch2.2.2-cuda12.1.dockerfile +++ b/docker/torch2.2.2-cuda12.1.dockerfile @@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \ onnx \ onnxruntime \ onnxmltools \ + onnxoptimizer \ + onnxsim \ multi_quantization \ typeguard \ numpy \ diff --git a/egs/audioset/AT/zipformer/export-onnx.py b/egs/audioset/AT/zipformer/export-onnx.py index af83c0e9c6..9476dac628 100755 --- a/egs/audioset/AT/zipformer/export-onnx.py +++ b/egs/audioset/AT/zipformer/export-onnx.py @@ -6,56 +6,28 @@ """ This script exports a transducer model from PyTorch to ONNX. -We use the pre-trained model from -https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 -as an example to show how to use this file. - -1. Download the pre-trained model - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/ -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -pushd $repo -git lfs pull --include "exp/pretrained.pt" - -cd exp -ln -s pretrained.pt epoch-99.pt -popd - -2. Export the model to ONNX - -./zipformer/export-onnx.py \ - --use-averaged-model 0 \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --num-encoder-layers "2,2,3,4,3,2" \ - --downsampling-factor "1,2,4,8,4,2" \ - --feedforward-dim "512,768,1024,1536,1024,768" \ - --num-heads "4,4,4,8,4,4" \ - --encoder-dim "192,256,384,512,384,256" \ - --query-head-dim 32 \ - --value-head-dim 12 \ - --pos-head-dim 4 \ - --pos-dim 48 \ - --encoder-unmasked-dim "192,192,256,256,256,192" \ - --cnn-module-kernel "31,31,15,15,15,31" \ - --decoder-dim 512 \ - --joiner-dim 512 \ - --causal False \ - --chunk-size "16,32,64,-1" \ - --left-context-frames "64,128,256,-1" - -It will generate the following 3 files inside $repo/exp: - - - encoder-epoch-99-avg-1.onnx - - decoder-epoch-99-avg-1.onnx - - joiner-epoch-99-avg-1.onnx - -See ./onnx_pretrained.py and ./onnx_check.py for how to +Usage of this script: + + repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12 + repo=$(basename $repo_url) + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + pushd $repo/exp + git lfs pull --include pretrained.pt + ln -s pretrained.pt epoch-99.pt + popd + + python3 zipformer/export-onnx.py \ + --exp-dir $repo/exp \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model 0 + + pushd $repo/exp + mv model-epoch-99-avg-1.onnx model.onnx + mv model-epoch-99-avg-1.int8.onnx model.int8.onnx + popd + +See ./onnx_pretrained.py use the exported ONNX models. """ @@ -66,9 +38,11 @@ import k2 import onnx +import onnxoptimizer import torch import torch.nn as nn from onnxruntime.quantization import QuantType, quantize_dynamic +from onnxsim import simplify from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params from zipformer import Zipformer2 @@ -261,6 +235,29 @@ def export_audio_tagging_model_onnx( add_meta_data(filename=filename, meta_data=meta_data) +def optimize_model(filename): + # see + # https://github.com/microsoft/onnxruntime/issues/1899#issuecomment-534806537 + # and + # https://github.com/onnx/onnx/issues/582#issuecomment-937788108 + # and + # https://github.com/onnx/optimizer/issues/110 + # and + # https://qiita.com/Yossy_Hal/items/34f3b2aef2199baf7f5f + passes = ["eliminate_unused_initializer"] + onnx_model = onnx.load(filename) + onnx_model = onnxoptimizer.optimize(onnx_model, passes) + + model_simp, check = simplify(onnx_model) + if check: + logging.info("Simplified the model!") + onnx_model = model_simp + else: + logging.info("Failed to simplify the model!") + + onnx.save(onnx_model, filename) + + @torch.no_grad() def main(): args = get_parser().parse_args() @@ -389,6 +386,7 @@ def main(): model_filename, opset_version=opset_version, ) + optimize_model(model_filename) logging.info(f"Exported audio tagging model to {model_filename}") # Generate int8 quantization models @@ -403,6 +401,7 @@ def main(): op_types_to_quantize=["MatMul"], weight_type=QuantType.QInt8, ) + optimize_model(model_filename_int8) if __name__ == "__main__": diff --git a/egs/audioset/AT/zipformer/export.py b/egs/audioset/AT/zipformer/export.py index bdcf8b7dd9..6ceeca8dee 100755 --- a/egs/audioset/AT/zipformer/export.py +++ b/egs/audioset/AT/zipformer/export.py @@ -25,7 +25,7 @@ Usage: -Note: This is a example for librispeech dataset, if you are using different +Note: This is an example for AudioSet dataset, if you are using different dataset, you should change the argument values according to your dataset. (1) Export to torchscript model using torch.jit.script() @@ -42,6 +42,7 @@ Check ./jit_pretrained.py for its usage. Check https://github.com/k2-fsa/sherpa +and https://github.com/k2-fsa/sherpa-onnx for how to use the exported models outside of icefall. (2) Export `model.state_dict()` @@ -55,13 +56,13 @@ It will generate a file `pretrained.pt` in the given `exp_dir`. You can later load it by `icefall.checkpoint.load_checkpoint()`. -To use the generated file with `zipformer/decode.py`, +To use the generated file with `zipformer/evaluate.py`, you can do: cd /path/to/exp_dir ln -s pretrained.pt epoch-9999.pt - cd /path/to/egs/librispeech/ASR + cd /path/to/egs/audioset/AT ./zipformer/evaluate.py \ --exp-dir ./zipformer/exp \ --use-averaged-model False \ diff --git a/egs/audioset/AT/zipformer/jit_pretrained.py b/egs/audioset/AT/zipformer/jit_pretrained.py index 8e3afcb6fc..403308fcfb 100755 --- a/egs/audioset/AT/zipformer/jit_pretrained.py +++ b/egs/audioset/AT/zipformer/jit_pretrained.py @@ -28,10 +28,20 @@ Usage of this script: -./zipformer/jit_pretrained.py \ - --nn-model-filename ./zipformer/exp/cpu_jit.pt \ - /path/to/foo.wav \ - /path/to/bar.wav + repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12 + repo=$(basename $repo_url) + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + pushd $repo/exp + git lfs pull --include jit_script.pt + popd + + python3 zipformer/jit_pretrained.py \ + --nn-model-filename $repo/exp/jit_script.pt \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav """ import argparse @@ -168,7 +178,8 @@ def main(): topk_prob, topk_index = logit.sigmoid().topk(5) topk_labels = [label_dict[index.item()] for index in topk_index] logging.info( - f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}" + f"{filename}: Top 5 predicted labels are {topk_labels} with " + f"probability of {topk_prob.tolist()}" ) logging.info("Done") diff --git a/egs/audioset/AT/zipformer/onnx_pretrained.py b/egs/audioset/AT/zipformer/onnx_pretrained.py index c7753715ac..1d3093d999 100755 --- a/egs/audioset/AT/zipformer/onnx_pretrained.py +++ b/egs/audioset/AT/zipformer/onnx_pretrained.py @@ -17,48 +17,25 @@ # limitations under the License. """ This script loads ONNX models and uses them to decode waves. -You can use the following command to get the exported models: -We use the pre-trained model from -https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/ -as an example to show how to use this file. - -1. Download the pre-trained model - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/ -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -pushd $repo -git lfs pull --include "exp/pretrained.pt" - -cd exp -ln -s pretrained.pt epoch-99.pt -popd - -2. Export the model to ONNX - -./zipformer/export-onnx.py \ - --use-averaged-model 0 \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --causal False - -It will generate the following 3 files inside $repo/exp: - - - model-epoch-99-avg-1.onnx - -3. Run this file - -./zipformer/onnx_pretrained.py \ - --model-filename $repo/exp/model-epoch-99-avg-1.onnx \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav +Usage of this script: + + repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12 + repo=$(basename $repo_url) + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + pushd $repo/exp + git lfs pull --include "*.onnx" + popd + + for m in model.onnx model.int8.onnx; do + python3 zipformer/onnx_pretrained.py \ + --model-filename $repo/exp/model.onnx \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav + done """ import argparse diff --git a/egs/audioset/AT/zipformer/pretrained.py b/egs/audioset/AT/zipformer/pretrained.py index 60e4d05182..bdbd799fad 100755 --- a/egs/audioset/AT/zipformer/pretrained.py +++ b/egs/audioset/AT/zipformer/pretrained.py @@ -18,27 +18,25 @@ This script loads a checkpoint and uses it to decode waves. You can generate the checkpoint with the following command: -Note: This is a example for librispeech dataset, if you are using different +Note: This is an example for the AudioSet dataset, if you are using different dataset, you should change the argument values according to your dataset. - -./zipformer/export.py \ - --exp-dir ./zipformer/exp \ - --tokens data/lang_bpe_500/tokens.txt \ - --epoch 30 \ - --avg 9 - Usage of this script: -./zipformer/pretrained.py \ - --checkpoint ./zipformer/exp/pretrained.pt \ - /path/to/foo.wav \ - /path/to/bar.wav - - -You can also use `./zipformer/exp/epoch-xx.pt`. - -Note: ./zipformer/exp/pretrained.pt is generated by ./zipformer/export.py + repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12 + repo=$(basename $repo_url) + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + pushd $repo/exp + git lfs pull --include pretrained.pt + popd + + python3 zipformer/pretrained.py \ + --checkpoint $repo/exp/pretrained.pt \ + --label-dict $repo/data/class_labels_indices.csv \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav \ + $repo/test_wavs/4.wav """ @@ -189,7 +187,8 @@ def main(): topk_prob, topk_index = logit.sigmoid().topk(5) topk_labels = [label_dict[index.item()] for index in topk_index] logging.info( - f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}" + f"{filename}: Top 5 predicted labels are {topk_labels} with " + f"probability of {topk_prob.tolist()}" ) logging.info("Done") @@ -199,4 +198,5 @@ def main(): formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" logging.basicConfig(format=formatter, level=logging.INFO) + main() diff --git a/requirements.txt b/requirements.txt index 6bafa6aca3..8410453f95 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,13 +8,14 @@ pypinyin==0.50.0 tensorboard typeguard dill -onnx==1.15.0 -onnxruntime==1.16.3 +onnx>=1.15.0 +onnxruntime>=1.16.3 +onnxoptimizer # style check session: black==22.3.0 isort==5.10.1 -flake8==5.0.4 +flake8==5.0.4 # cantonese word segment support -pycantonese==3.4.0 \ No newline at end of file +pycantonese==3.4.0