diff --git a/api/api.py b/api/api.py index 4c94422..2cdb4cb 100644 --- a/api/api.py +++ b/api/api.py @@ -3,10 +3,8 @@ import tempfile import logging from typing import Dict -from modal import Image, Stub, web_endpoint, asgi_app -from fastapi import FastAPI, Response +from modal import Image, Stub, web_endpoint from fastapi.responses import StreamingResponse -from fastapi.middleware.cors import CORSMiddleware # Define the GPU type to be used for processing GPU_TYPE = "T4" @@ -21,9 +19,7 @@ def download_models(): # Define model names for the translator and vocoder model_name = "seamlessM4T_v2_large" - vocoder_name = ( - "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs" - ) + vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs" # Initialize the translator model with specified parameters Translator( @@ -105,7 +101,6 @@ async def generate_seamlessm4t_speech(item: Dict): - Dict: A dictionary containing the status code, message, detected speech chunks, and the translated text. """ # import wave - import os import torch import torchaudio @@ -148,9 +143,7 @@ async def generate_seamlessm4t_speech(item: Dict): print(speech_timestamps_seconds) # translator = download_models() model_name = "seamlessM4T_v2_large" - vocoder_name = ( - "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs" - ) + vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs" translator = Translator( model_name, @@ -189,9 +182,7 @@ async def generate(): ) resampled_waveform = resampler(waveform) torchaudio.save("resampled.wav", resampled_waveform, resample_rate) - translated_text, _ = translator.predict( - "resampled.wav", "s2tt", target_lang - ) + translated_text, _ = translator.predict("resampled.wav", "s2tt", target_lang) text.append(str(translated_text[0])) # os.remove(new_audio_name) # os.remove("resampled.wav")