diff --git a/Inference.py b/Inference.py index 38ee7ac..a844d0e 100644 --- a/Inference.py +++ b/Inference.py @@ -8,7 +8,7 @@ ############################################################################################ -model_path = "./ASR_2_1_220.pth" +model_path = "./ASR_2_1_300.pth" ############################################################################################ diff --git a/main.py b/main.py index ac89115..d1d8fe7 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ -import sys import io +import sys +import gdown from fastapi import ( FastAPI, File, @@ -11,13 +12,20 @@ from pydantic import BaseModel from Inference import predict - import tempfile import os from utils.Translation import * app = FastAPI() +# Function to get the model and tokenizer from Google Drive instead of putting it in the repo +def download_file_from_google_drive(file_id, output_path): + url = f'https://drive.google.com/uc?id={file_id}' + gdown.download(url, output_path, quiet=False) + + +download_file_from_google_drive('1wYF0uHMHWdWb6G2XOB6dLQj3LWyz8u5X', './ASR_2_1_300.pth') + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8") # Add CORS middleware @@ -40,16 +48,14 @@ class TranslationRequest(BaseModel): @app.post("/translate/auto") -async def translate(request: TranslationRequest): +async def translateOpenL(request: TranslationRequest): response = translate_openl(request.text) return {"translation": response} @app.post("/translate/en") -async def translate(request: TranslationRequest): - print(request.text) - response = translate(request.text) - print(response) +async def translate_endpoint(request: TranslationRequest): + response = await translate(request.text) return {"translation": response} @@ -64,7 +70,7 @@ async def upload_audio(file: UploadFile = File(...)): # Create a temporary file in the current working directory with tempfile.NamedTemporaryFile( - dir=current_dir, delete=False, suffix=".wav" + dir=current_dir, delete=False, suffix=".wav" ) as tmp_file: tmp_file.write(contents) tmp_file_path = tmp_file.name # Get the path of the temp file diff --git a/requirements.txt b/requirements.txt index f4fc38e..6e4ab38 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ fastapi~=0.115.0 -openai~=1.50.2 -python-dotenv~=1.0.1 numpy~=2.0.2 torch~=2.4.1 uvicorn~=0.31.0 python-multipart~=0.0.12 pydantic~=2.8.2 librosa~=0.10.2.post1 -requests~=2.32.3 \ No newline at end of file +requests~=2.32.3 +gdown~=5.2.0 +tokenizers~=0.20.1 +pandas~=2.2.3 \ No newline at end of file diff --git a/utils/Translation.py b/utils/Translation.py index 3cdb3fb..aacf74a 100644 --- a/utils/Translation.py +++ b/utils/Translation.py @@ -1,32 +1,20 @@ import requests -import sys -import io -import os import re -import numpy as np - -from pathlib import Path import pandas as pd import torch -# import evaluate - from torch import nn -from torch.utils.data import DataLoader, Dataset, Subset - -from tqdm.notebook import tqdm +from torch.utils.data import Dataset from tokenizers import Tokenizer from tokenizers import normalizers from tokenizers import pre_tokenizers -from tokenizers.models import WordLevel, WordPiece +from tokenizers.models import WordLevel from tokenizers.processors import TemplateProcessing from tokenizers.normalizers import NFC, StripAccents, Strip, Lowercase from tokenizers.pre_tokenizers import Whitespace -from tokenizers.trainers import WordLevelTrainer, WordPieceTrainer - -from sklearn.model_selection import train_test_split +from tokenizers.trainers import WordLevelTrainer def translate_openl(txt): @@ -204,13 +192,13 @@ def forward(self, src, src_mask): class Encoder(nn.Module): def __init__( - self, - vocab_size: int, - latent_dim: int, - n_layers: int, - n_heads: int, - pf_dim: int, - dropout: float, + self, + vocab_size: int, + latent_dim: int, + n_layers: int, + n_heads: int, + pf_dim: int, + dropout: float, ): super().__init__() @@ -309,14 +297,14 @@ def forward(self, trg, enc_out, trg_mask, src_mask): class Decoder(nn.Module): def __init__( - self, - vocab_size: int, - latent_dim: int, - n_layers: int, - n_heads: int, - pf_dim: int, - dropout: float, - max_len: int = 100, + self, + vocab_size: int, + latent_dim: int, + n_layers: int, + n_heads: int, + pf_dim: int, + dropout: float, + max_len: int = 100, ): super().__init__() self.latent_dim = latent_dim @@ -373,9 +361,9 @@ def forward(self, trg, enc_out, trg_mask, src_mask): class Seq2Seq(nn.Module): def __init__( - self, - encoder: nn.Module, - decoder: nn.Module, + self, + encoder: nn.Module, + decoder: nn.Module, ): super().__init__() self.encoder = encoder @@ -416,14 +404,13 @@ def forward(self, src, trg, src_mask, trg_mask): def translate_sentence( - sentence: str, - src_tokenizer: Tokenizer, - trg_tokenizer: Tokenizer, - model: nn.Module, - device: torch.device, - max_len: int = 50, + sentence: str, + src_tokenizer: Tokenizer, + trg_tokenizer: Tokenizer, + model: nn.Module, + device: torch.device, + max_len: int = 50, ): - model.eval() # encode sentence @@ -545,5 +532,4 @@ def translate(sentence): # print(translation) return translation - # print(translate("سوف نذهب الي الحديقة الاحد المقبل"))