From 7931f6520db649049343d165d2734dddb49e406b Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 18 Jul 2024 21:15:14 +0800 Subject: [PATCH] swift api for sense voice --- .gitignore | 1 + swift-api-examples/SherpaOnnx.swift | 18 ++++++++++++++++-- .../decode-file-non-streaming.swift | 19 ++++++++++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index a39684cee..5486ad51a 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,4 @@ sherpa-onnx-telespeech-ctc-* *.fst .ccache lib*.a +sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index eba8d8916..c6f8b51eb 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -355,6 +355,18 @@ func sherpaOnnxOfflineTdnnModelConfig( ) } +func sherpaOnnxOfflineSenseVoiceModelConfig( + model: String = "", + language: String = "", + useInverseTextNormalization: Bool = false +) -> SherpaOnnxOfflineSenseVoiceModelConfig { + return SherpaOnnxOfflineSenseVoiceModelConfig( + model: toCPointer(model), + language: toCPointer(language), + use_itn: useInverseTextNormalization ? 1 : 0 + ) +} + func sherpaOnnxOfflineLMConfig( model: String = "", scale: Float = 1.0 @@ -378,7 +390,8 @@ func sherpaOnnxOfflineModelConfig( modelType: String = "", modelingUnit: String = "cjkchar", bpeVocab: String = "", - teleSpeechCtc: String = "" + teleSpeechCtc: String = "", + senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig() ) -> SherpaOnnxOfflineModelConfig { return SherpaOnnxOfflineModelConfig( transducer: transducer, @@ -393,7 +406,8 @@ func sherpaOnnxOfflineModelConfig( model_type: toCPointer(modelType), modeling_unit: toCPointer(modelingUnit), bpe_vocab: toCPointer(bpeVocab), - telespeech_ctc: toCPointer(teleSpeechCtc) + telespeech_ctc: toCPointer(teleSpeechCtc), + sense_voice: senseVoice ) } diff --git a/swift-api-examples/decode-file-non-streaming.swift b/swift-api-examples/decode-file-non-streaming.swift index ca9d9475e..a60777832 100644 --- a/swift-api-examples/decode-file-non-streaming.swift +++ b/swift-api-examples/decode-file-non-streaming.swift @@ -17,6 +17,7 @@ func run() { var modelConfig: SherpaOnnxOfflineModelConfig var modelType = "whisper" // modelType = "paraformer" + // modelType = "sense_voice" if modelType == "whisper" { let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx" @@ -47,6 +48,19 @@ func run() { debug: 0, modelType: "paraformer" ) + } else if modelType == "sense_voice" { + let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx" + let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt" + let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig( + model: model, + useInverseTextNormalization: true + ) + + modelConfig = sherpaOnnxOfflineModelConfig( + tokens: tokens, + debug: 0, + senseVoice: senseVoiceConfig + ) } else { print("Please specify a supported modelType \(modelType)") return @@ -63,7 +77,10 @@ func run() { recognizer = SherpaOnnxOfflineRecognizer(config: &config) - let filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav" + var filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav" + if modelType == "sense_voice" { + filePath = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav" + } let fileURL: NSURL = NSURL(fileURLWithPath: filePath) let audioFile = try! AVAudioFile(forReading: fileURL as URL)