-
Notifications
You must be signed in to change notification settings - Fork 0
/
whisper.py
27 lines (20 loc) · 1.04 KB
/
whisper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import soundfile as sf
def transcribe_finnish(audio_file):
# Load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
forced_decoder_ids = processor.get_decoder_prompt_ids(language="finnish", task="transcribe")
# Load audio file
speech, sampling_rate = sf.read(audio_file)
# Process the audio file
input_features = processor(speech, sampling_rate=sampling_rate, return_tensors="pt").input_features
# Generate token ids
predicted_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription
if __name__ == "__main__":
audio_file = 'suomi.wav' # Path to your audio file
result = transcribe_finnish(audio_file)
print("Transcription:", result)