forked from aritrasen87/LLM_RAG_Model_Deployment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
47 lines (34 loc) · 907 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import uvicorn
import os
import gradio as gr
from utils.inference import predict_rag
from api import FastAPI
from pydantic import BaseModel
from dotenv import load_dotenv
load_dotenv()
app = FastAPI()
class Request(BaseModel):
prompt : str
class Response(BaseModel):
response : str
@app.post("/",response_model=Response)
async def predict_api(prompt:Request):
response = predict_rag(Request.prompt)
return response
demo = gr.ChatInterface(
fn=predict_rag,
textbox=gr.Textbox(
placeholder="Ask a question", container=False,lines=1,scale=8
),
title="LLM App",
undo_btn="Delete Previous",
clear_btn="Clear",
)
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
# mounting at the root path
uvicorn.run(
app="main:app",
host=os.getenv("UVICORN_HOST"),
port=int(os.getenv("UVICORN_PORT"))
)