diff --git a/.github/workflows/export-whisper-to-onnx.yaml b/.github/workflows/export-whisper-to-onnx.yaml index 603e82312..a50aa99d7 100644 --- a/.github/workflows/export-whisper-to-onnx.yaml +++ b/.github/workflows/export-whisper-to-onnx.yaml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [macos-latest] - model: ["distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] + model: ["turbo", "distil-medium.en", "distil-small.en", "tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "medium-aishell", "large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] # model: ["large", "large-v1", "large-v2", "large-v3", "distil-large-v2"] python-version: ["3.8"] @@ -32,7 +32,8 @@ jobs: shell: bash run: | python3 -m pip install torch==1.13.0 torchaudio==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html - python3 -m pip install openai-whisper==20231117 onnxruntime onnx soundfile librosa + python3 -m pip install -U openai-whisper + python3 -m pip install onnxruntime onnx soundfile librosa - name: export ${{ matrix.model }} shell: bash diff --git a/scripts/whisper/export-onnx.py b/scripts/whisper/export-onnx.py index cf9c6c892..27f10c941 100755 --- a/scripts/whisper/export-onnx.py +++ b/scripts/whisper/export-onnx.py @@ -46,7 +46,8 @@ def get_args(): choices=[ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", - "large", "large-v1", "large-v2", "large-v3", + "large-v1", "large-v2", + "large", "large-v3", "turbo", # these three have feature dim 128 "distil-medium.en", "distil-small.en", "distil-large-v2", # "distil-large-v3", # distil-large-v3 is not supported! # for fine-tuned models from icefall @@ -76,7 +77,7 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]): meta.key = key meta.value = str(value) - if "large" in filename: + if "large" in filename or "turbo" in filename: external_filename = filename.split(".onnx")[0] onnx.save( model, @@ -404,7 +405,7 @@ def main(): audio = whisper.pad_or_trim(audio) assert audio.shape == (16000 * 30,), audio.shape - if args.model in ("large", "large-v3"): + if args.model in ("large", "large-v3", "turbo"): n_mels = 128 else: n_mels = 80