Skip to content

Commit

Permalink
Merge branch 'main' of github.com:kurianbenoy/Indic-Subtitler
Browse files Browse the repository at this point in the history
  • Loading branch information
BodaNabeel committed Feb 20, 2024
2 parents 3b842c7 + 1cb3cf8 commit 3d6d224
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
25 changes: 15 additions & 10 deletions api/server.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import tempfile
import logging
from typing import Dict
from modal import Image, Stub, web_endpoint

Expand Down Expand Up @@ -97,23 +98,25 @@ def generate_seamlessm4t_speech(item: Dict):
Returns:
- Dict: A dictionary containing the status code, message, detected speech chunks, and the translated text.
"""
import wave
# import wave
import os

import torch
import torchaudio
from pydub import AudioSegment
from seamless_communication.inference import Translator

# function to calculate the duration of the input audio clip
def get_duration_wave(file_path):
with wave.open(file_path, "r") as audio_file:
frame_rate = audio_file.getframerate()
n_frames = audio_file.getnframes()
duration = n_frames / float(frame_rate)
return duration
# removed because of error in mp4 & mp3 files because of wave
# # function to calculate the duration of the input audio clip
# def get_duration_wave(file_path):
# with wave.open(file_path, "r") as audio_file:
# frame_rate = audio_file.getframerate()
# n_frames = audio_file.getnframes()
# duration = n_frames / float(frame_rate)
# return duration

try:
print(f"Payload: {item}")
USE_ONNX = False
model, utils = torch.hub.load(
repo_or_dir="snakers4/silero-vad", model="silero_vad", onnx=USE_ONNX
Expand All @@ -130,6 +133,7 @@ def get_duration_wave(file_path):
# Decode the base64 audio and convert it for processing
b64 = item["wav_base64"]
# source_lang = item["source"]
print(f"Target_lang: {item.get('target')}")
target_lang = item["target"]

fname = base64_to_audio_file(b64_contents=b64)
Expand All @@ -156,8 +160,8 @@ def get_duration_wave(file_path):
dtype=torch.float16,
)

duration = get_duration_wave(fname)
print(f"Duration: {duration:.2f} seconds")
# duration = get_duration_wave(fname)
# print(f"Duration: {duration:.2f} seconds")

resample_rate = 16000

Expand Down Expand Up @@ -211,4 +215,5 @@ def get_duration_wave(file_path):

except Exception as e:
print(e)
logging.critical(e, exc_info=True)
return {"message": "Internal server error", "code": 500}
2 changes: 1 addition & 1 deletion api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def audio_file_to_base64(file_path):
b64 = audio_file_to_base64("/home/kurian/git/Indic-Subtitler/api/mal_sample1.mp4")
# print(b64)
print({"wav_base64": b64, "target": "mal"})
x = requests.post(URL, json={"wav_base64": b64, "source": "mal", "target": "hin"})
x = requests.post(URL, json={"wav_base64": b64, "target": "mal"})
print(x)
res = x.json()

Expand Down

0 comments on commit 3d6d224

Please sign in to comment.