Merge branch 'main' of github.com:kurianbenoy/Indic-Subtitler

kurianbenoy · Feb 20, 2024 · 3d6d224 · 3d6d224
2 parents 3b842c7 + 1cb3cf8
commit 3d6d224
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 11 deletions.
diff --git a/api/server.py b/api/server.py
@@ -1,5 +1,6 @@
 import base64
 import tempfile
+import logging
 from typing import Dict
 from modal import Image, Stub, web_endpoint
 
@@ -97,23 +98,25 @@ def generate_seamlessm4t_speech(item: Dict):
     Returns:
     - Dict: A dictionary containing the status code, message, detected speech chunks, and the translated text.
     """
-    import wave
+    # import wave
     import os
 
     import torch
     import torchaudio
     from pydub import AudioSegment
     from seamless_communication.inference import Translator
 
-    # function to calculate the duration of the input audio clip
-    def get_duration_wave(file_path):
-        with wave.open(file_path, "r") as audio_file:
-            frame_rate = audio_file.getframerate()
-            n_frames = audio_file.getnframes()
-            duration = n_frames / float(frame_rate)
-            return duration
+    # removed because of error in mp4 & mp3 files because of wave
+    # # function to calculate the duration of the input audio clip
+    # def get_duration_wave(file_path):
+    #     with wave.open(file_path, "r") as audio_file:
+    #         frame_rate = audio_file.getframerate()
+    #         n_frames = audio_file.getnframes()
+    #         duration = n_frames / float(frame_rate)
+    #         return duration
 
     try:
+        print(f"Payload: {item}")
         USE_ONNX = False
         model, utils = torch.hub.load(
             repo_or_dir="snakers4/silero-vad", model="silero_vad", onnx=USE_ONNX
@@ -130,6 +133,7 @@ def get_duration_wave(file_path):
         # Decode the base64 audio and convert it for processing
         b64 = item["wav_base64"]
         # source_lang = item["source"]
+        print(f"Target_lang: {item.get('target')}")
         target_lang = item["target"]
 
         fname = base64_to_audio_file(b64_contents=b64)
@@ -156,8 +160,8 @@ def get_duration_wave(file_path):
             dtype=torch.float16,
         )
 
-        duration = get_duration_wave(fname)
-        print(f"Duration: {duration:.2f} seconds")
+        # duration = get_duration_wave(fname)
+        # print(f"Duration: {duration:.2f} seconds")
 
         resample_rate = 16000
 
@@ -211,4 +215,5 @@ def get_duration_wave(file_path):
 
     except Exception as e:
         print(e)
+        logging.critical(e, exc_info=True)
         return {"message": "Internal server error", "code": 500}
diff --git a/api/test_api.py b/api/test_api.py
@@ -58,7 +58,7 @@ def audio_file_to_base64(file_path):
 b64 = audio_file_to_base64("/home/kurian/git/Indic-Subtitler/api/mal_sample1.mp4")
 # print(b64)
 print({"wav_base64": b64, "target": "mal"})
-x = requests.post(URL, json={"wav_base64": b64, "source": "mal", "target": "hin"})
+x = requests.post(URL, json={"wav_base64": b64, "target": "mal"})
 print(x)
 res = x.json()