Skip to content

Commit

Permalink
fix(error): added more specific errors (#491)
Browse files Browse the repository at this point in the history
Signed-off-by: Abhiuday <[email protected]>
  • Loading branch information
aeswibon authored Apr 4, 2024
1 parent 9533035 commit ee6d681
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 64 deletions.
4 changes: 2 additions & 2 deletions ayushma/utils/converse.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def converse_api(
# store time to complete request
stats["start_time"] = time.time()
if converse_type == "audio" and not audio:
return Exception("Please provide audio to generate transcript")
return Exception("[Converse] Please provide audio to generate transcript")
if converse_type == "text" and not text:
return Exception("Please provide text to generate transcript")
return Exception("[Converse] Please provide text to generate transcript")

if converse_type == "audio":
stats["transcript_start_time"] = time.time()
Expand Down
13 changes: 7 additions & 6 deletions ayushma/utils/language_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def translate_text(target, text):
result = translate_client.translate(text, target_language=target)
return result["translatedText"]
except Exception as e:
print(e)
raise APIException("Translation failed")
print(f"Translation failed: {e}")
raise APIException("[Translation] Failed to translate the text")


language_code_voice_map = {
Expand Down Expand Up @@ -55,7 +55,8 @@ def text_to_speech(text, language_code, service):
synthesis_input = texttospeech.SynthesisInput(text=text)

voice = texttospeech.VoiceSelectionParams(
language_code=language_code, name=language_code_voice_map[language_code]
language_code=language_code,
name=language_code_voice_map[language_code],
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
Expand All @@ -77,7 +78,7 @@ def text_to_speech(text, language_code, service):
)
return response.read()
else:
raise APIException("Service not supported")
raise APIException("[Text to Speech] Service not supported.")
except Exception as e:
print(e)
return None
print(f"Failed to convert text to speech: {e}")
raise APIException("[Text to Speech] Failed to convert text to speech.")
36 changes: 24 additions & 12 deletions ayushma/utils/openaiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_sanitized_reference(pinecone_references: List[QueryResponse]) -> str:
else:
sanitized_reference[document_id] = text
except Exception as e:
print(e)
print(f"Error extracting reference: {e}")
pass

return json.dumps(sanitized_reference)
Expand Down Expand Up @@ -143,17 +143,17 @@ def get_reference(text, openai_key, namespace, top_k):
try:
embeddings.append(get_embedding(text=[text], openai_api_key=openai_key))
except Exception as e:
return Exception(
e.__str__(),
)
print(f"Error generating embeddings: {e}")
return Exception("[Reference] Error generating embeddings")
else:
parts = split_text(text)
for part in parts:
try:
embeddings.append(get_embedding(text=[part], openai_api_key=openai_key))
except Exception as e:
print(f"Error generating embeddings: {e}")
raise Exception(
e.__str__(),
"[Reference] Error generating embeddings for split text"
)
# find similar embeddings from pinecone index for each embedding
pinecone_references: List[QueryResponse] = []
Expand Down Expand Up @@ -187,7 +187,7 @@ def add_reference_documents(chat_message):
except Document.DoesNotExist:
pass
except Exception as e:
print("Error adding reference documents: ", e)
print(f"Error adding reference documents: {e}")

chat_message.original_message = chat_text[
:ref_start_idx
Expand Down Expand Up @@ -297,10 +297,13 @@ def converse(
elif fetch_references and chat.project and chat.project.external_id:
try:
reference = get_reference(
english_text, openai_key, str(chat.project.external_id), match_number
english_text,
openai_key,
str(chat.project.external_id),
match_number,
)
except Exception as e:
print(e)
print(f"Error fetching references: {e}")
reference = ""
else:
reference = ""
Expand Down Expand Up @@ -438,8 +441,10 @@ def converse(
None,
)
except Exception as e:
print(e)
error_text = str(e)
print(f"Error in streaming response: {e}")
error_text = (
"[Streaming] Something went wrong in getting response, stream stopped"
)
translated_error_text = error_text
if user_language != "en-IN":
translated_error_text = translate_text(user_language, error_text)
Expand All @@ -464,7 +469,13 @@ def converse(
},
)
yield create_json_response(
local_translated_text, chat.external_id, "", str(e), True, True, None
local_translated_text,
chat.external_id,
"",
str(e),
True,
True,
None,
)


Expand Down Expand Up @@ -499,7 +510,8 @@ def converse_thread(
if status == "completed":
break
else:
raise Exception("Thread timed out")
print("[Thread] Run did not complete, timed out")
raise Exception("[Thread] Run did not complete, timed out")

response = (
client.beta.threads.messages.list(thread_id=thread.thread_id)
Expand Down
101 changes: 60 additions & 41 deletions ayushma/utils/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,21 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
# api_version="2020-11-07",
)
return transcription.text
try:
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
# api_version="2020-11-07",
)
return transcription.text
except Exception as e:
print(f"Failed to recognize speech with whisper engine: {e}")
raise ValueError(
"[Speech to Text] Failed to recognize speech with Whisper engine"
)


class GoogleEngine:
Expand All @@ -31,41 +37,52 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
client = speech.SpeechClient()
audio_content = audio.file.read()
audio_data = speech.RecognitionAudio(content=audio_content)

config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=self.language_code,
)

response = client.recognize(config=config, audio=audio_data)
if not response.results:
return ""
return response.results[0].alternatives[0].transcript
try:
client = speech.SpeechClient()
audio_content = audio.file.read()
audio_data = speech.RecognitionAudio(content=audio_content)

config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=self.language_code,
)

response = client.recognize(config=config, audio=audio_data)
if not response.results:
return ""
return response.results[0].alternatives[0].transcript
except Exception as e:
print(f"Failed to recognize speech with google engine: {e}")
raise ValueError(
"[Speech to Text] Failed to recognize speech with Google engine"
)


class SelfHostedEngine:
def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
response = requests.post(
settings.SELF_HOSTED_ENDPOINT,
files={"audio": audio},
data={
# change this model to get faster results see: https://github.com/coronasafe/care-whisper
"model": "small",
"language": self.language_code.replace("-IN", ""),
},
)

if not response.ok:
print("Failed to recognize speech with self hosted engine")
return ""
response = response.json()
return response["data"]["transcription"].strip()
try:
response = requests.post(
settings.SELF_HOSTED_ENDPOINT,
files={"audio": audio},
data={
# change this model to get faster results see: https://github.com/coronasafe/care-whisper
"model": "small",
"language": self.language_code.replace("-IN", ""),
},
)

if not response.ok:
print("Failed to recognize speech with self hosted engine")
return ""
response = response.json()
return response["data"]["transcription"].strip()
except Exception as e:
raise ValueError(
"[Speech to Text] Failed to recognize speech with Self Hosted engine"
)


engines = {
Expand All @@ -82,14 +99,16 @@ def speech_to_text(engine_id, audio, language_code):
engine_class = engines.get(engine_name)

if not engine_class:
raise ValueError(f"Invalid STT engine ID: {engine_id}")
raise ValueError(f"[Speech to Text] Engine with ID {engine_id} not found")

try:
engine = engine_class(api_key, language_code)
recognized_text = engine.recognize(audio)
if not recognized_text:
raise ValueError("Failed to detect any speech in provided audio")
raise ValueError("[Speech to Text] No text recognized")
return recognized_text
except Exception as e:
print(f"Failed to recognize speech with {engine_name} engine: {e}")
raise e
print(f"Failed to transcribe speech with {engine_name} engine: {e}")
raise ValueError(
f"[Speech to Text] Failed to transcribe speech with {engine_name} engine"
)
7 changes: 5 additions & 2 deletions ayushma/utils/upsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def upsert(
None
"""
pinecone.init(
api_key=settings.PINECONE_API_KEY, environment=settings.PINECONE_ENVIRONMENT
api_key=settings.PINECONE_API_KEY,
environment=settings.PINECONE_ENVIRONMENT,
)
print("Initialized Pinecone and OpenAI")

Expand All @@ -80,7 +81,9 @@ def upsert(
raise Exception("Either filepath, url or text must be provided")

if len(document_lines) == 0:
raise Exception("No text found in document")
raise Exception(
"[Upsert] No text found in the document. Please check the document."
)
print(document_lines)

batch_size = (
Expand Down
2 changes: 1 addition & 1 deletion ayushma/views/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def speech_to_text(self, *args, **kwarg):
print(f"Failed to transcribe speech with {stt_engine} engine: {e}")
return Response(
{
"error": "Something went wrong in getting transcription, please try again later"
"error": "[Transcribing] Something went wrong in getting transcription, please try again later"
},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
Expand Down

0 comments on commit ee6d681

Please sign in to comment.