-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
40 lines (30 loc) · 1.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import ollama
from flask import Flask, render_template
from flask_socketio import SocketIO, emit
# Initialize the Flask application
app = Flask(__name__)
# Initialize SocketIO with the Flask app
socketio = SocketIO(app)
# Define the route for the root URL
@app.route("/")
def index():
# Render the index.html template when the root URL is accessed
return render_template("index.html")
# Define the event handler for the "summarize" event
@socketio.on("summarize")
def handle_summarization(json):
# Extract the text to be summarized from the incoming JSON data
text = json["text"]
# Start a streaming chat with the custom Llama model
stream = ollama.chat(
model="llama-nor:latest", # Specify the model to use
messages=[{"role": "user", "content": text}], # Provide the user message
stream=True, # Enable streaming responses
)
# Iterate over the streaming response chunks
for chunk in stream:
# Emit each chunk of the response back to the client
emit("response", {"content": chunk["message"]["content"]})
# Run the Flask application with SocketIO support
if __name__ == "__main__":
socketio.run(app, debug=True)