-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech_analyzer.py
73 lines (56 loc) · 2.22 KB
/
speech_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import torch
import os
import gradio as gr
#from langchain.llms import OpenAI
from langchain.llms import HuggingFaceHub
from transformers import pipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
my_credentials = {
"url" : "https://us-south.ml.cloud.ibm.com"
}
params = {
GenParams.MAX_NEW_TOKENS: 800, # The maximum number of tokens that the model can generate in a single run.
GenParams.TEMPERATURE: 0.1, # A parameter that controls the randomness of the token generation. A lower value makes the generation more deterministic, while a higher value introduces more randomness.
}
LLAMA2_model = Model(
model_id= 'meta-llama/llama-2-70b-chat',
credentials=my_credentials,
params=params,
project_id="skills-network",
)
llm = WatsonxLLM(LLAMA2_model)
#######------------- Prompt Template-------------####
temp = """
<s><<SYS>>
List the key points with details from the context:
[INST] The context : {context} [/INST]
<</SYS>>
"""
pt = PromptTemplate(
input_variables=["context"],
template= temp)
prompt_to_LLAMA2 = LLMChain(llm=llm, prompt=pt)
#######------------- Speech2text-------------####
def transcript_audio(audio_file):
# Initialize the speech recognition pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny.en",
chunk_length_s=30,
)
# Transcribe the audio file and return the result
transcript_txt = pipe(audio_file, batch_size=8)["text"]
result = prompt_to_LLAMA2.run(transcript_txt)
return result
#######------------- Gradio-------------####
audio_input = gr.Audio(sources="upload", type="filepath")
output_text = gr.Textbox()
iface = gr.Interface(fn= transcript_audio,
inputs= audio_input, outputs= output_text,
title= "Audio Transcription App",
description= "Upload the audio file")
iface.launch(server_name="0.0.0.0", server_port=7860)