From eec0c16128570239db2896a077f4fd7423907144 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Tue, 29 Oct 2024 01:21:30 +0900 Subject: [PATCH] Fix VAD syntax & add vad handling case --- modules/whisper/base_transcription_pipeline.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/whisper/base_transcription_pipeline.py b/modules/whisper/base_transcription_pipeline.py index b9a3ae0..808a47b 100644 --- a/modules/whisper/base_transcription_pipeline.py +++ b/modules/whisper/base_transcription_pipeline.py @@ -135,12 +135,17 @@ def run(self, speech_pad_ms=vad_params.speech_pad_ms ) - audio, speech_chunks = self.vad.run( + vad_processed, speech_chunks = self.vad.run( audio=audio, vad_parameters=vad_options, progress=progress ) + if vad_processed.size > 0: + audio = vad_processed + else: + vad_params.vad_filter = False + result, elapsed_time = self.transcribe( audio, progress, @@ -150,7 +155,7 @@ def run(self, if vad_params.vad_filter: result = self.vad.restore_speech_timestamps( segments=result, - speech_chunks=vad_params.speech_chunks, + speech_chunks=speech_chunks, ) if diarization_params.is_diarize: