From 8f840edd31d933230bc191d4a8b5aa5c00b5a4a0 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Tue, 7 Nov 2023 18:43:26 +0100 Subject: [PATCH] [`Whisper`] Nit converting the tokenizer (#27349) * `nospeech` instead of `nocaption` for the no speech token * oups --- src/transformers/models/whisper/convert_openai_to_hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py index 1d016b5984397c..0db555c4aab278 100755 --- a/src/transformers/models/whisper/convert_openai_to_hf.py +++ b/src/transformers/models/whisper/convert_openai_to_hf.py @@ -235,7 +235,7 @@ def convert_tiktoken_to_hf( "<|transcribe|>", "<|startoflm|>", "<|startofprev|>", - "<|nocaptions|>", + "<|nospeech|>", "<|notimestamps|>", ] # these are special tokens, not normalized