Adding word level timestamps for Huggingface (transformers) whisper (#…

…971)
chidiwilliams · Nov 1, 2024 · 386c151 · 386c151
1 parent 8a1a967
commit 386c151
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 153 deletions.
diff --git a/buzz/transcriber/whisper_file_transcriber.py b/buzz/transcriber/whisper_file_transcriber.py
@@ -124,6 +124,7 @@ def transcribe_hugging_face(cls, task: FileTranscriptionTask) -> List[Segment]:
             audio=task.file_path,
             language=language,
             task=task.transcription_options.task.value,
+            word_timestamps=task.transcription_options.word_level_timings,
         )
         return [
             Segment(

diff --git a/buzz/transformers_whisper.py b/buzz/transformers_whisper.py
@@ -160,6 +160,7 @@ def transcribe(
         audio: Union[str, np.ndarray],
         language: str,
         task: str,
+        word_timestamps: bool = False,
     ):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -190,7 +191,7 @@ def transcribe(
             device=device,
         )
 
-        transcript = pipe(audio, return_timestamps=True)
+        transcript = pipe(audio, return_timestamps="word" if word_timestamps else True)
 
         segments = []
         for chunk in transcript['chunks']:

diff --git a/buzz/widgets/transcriber/file_transcription_form_widget.py b/buzz/widgets/transcriber/file_transcription_form_widget.py
@@ -108,7 +108,6 @@ def on_checkbox_state_changed(state: int):
 
     def reset_word_level_timings(self):
         self.word_level_timings_checkbox.setDisabled(
-            self.transcription_options.model.model_type == ModelType.HUGGING_FACE
-            or self.transcription_options.model.model_type
+            self.transcription_options.model.model_type
             == ModelType.OPEN_AI_WHISPER_API
         )