Skip to content

Commit

Permalink
Adding word level timestamps for Huggingface (transformers) whisper (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
raivisdejus authored Nov 1, 2024
1 parent 8a1a967 commit 386c151
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 153 deletions.
1 change: 1 addition & 0 deletions buzz/transcriber/whisper_file_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def transcribe_hugging_face(cls, task: FileTranscriptionTask) -> List[Segment]:
audio=task.file_path,
language=language,
task=task.transcription_options.task.value,
word_timestamps=task.transcription_options.word_level_timings,
)
return [
Segment(
Expand Down
3 changes: 2 additions & 1 deletion buzz/transformers_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def transcribe(
audio: Union[str, np.ndarray],
language: str,
task: str,
word_timestamps: bool = False,
):
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
Expand Down Expand Up @@ -190,7 +191,7 @@ def transcribe(
device=device,
)

transcript = pipe(audio, return_timestamps=True)
transcript = pipe(audio, return_timestamps="word" if word_timestamps else True)

segments = []
for chunk in transcript['chunks']:
Expand Down
3 changes: 1 addition & 2 deletions buzz/widgets/transcriber/file_transcription_form_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def on_checkbox_state_changed(state: int):

def reset_word_level_timings(self):
self.word_level_timings_checkbox.setDisabled(
self.transcription_options.model.model_type == ModelType.HUGGING_FACE
or self.transcription_options.model.model_type
self.transcription_options.model.model_type
== ModelType.OPEN_AI_WHISPER_API
)
Loading

0 comments on commit 386c151

Please sign in to comment.