-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
122 lines (101 loc) ยท 5.34 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
## pip install --upgrade google-genai==0.2.2 ##
import asyncio
import json
import os
import websockets
from google import genai
import base64
from dotenv import load_dotenv
# Load API key from environment
load_dotenv()
MODEL = "gemini-2.0-flash-exp" # use your model ID
client = genai.Client(
http_options={
'api_version': 'v1alpha',
}
)
async def gemini_session_handler(client_websocket: websockets.WebSocketServerProtocol):
"""Handles the interaction with Gemini API within a websocket session.
Args:
client_websocket: The websocket connection to the client.
"""
try:
config_message = await client_websocket.recv()
config_data = json.loads(config_message)
config = config_data.get("setup", {})
async with client.aio.live.connect(model=MODEL, config=config) as session:
print("Connected to Gemini API")
async def send_to_gemini():
"""Sends messages from the client websocket to the Gemini API."""
try:
async for message in client_websocket:
try:
data = json.loads(message)
if "realtime_input" in data:
for chunk in data["realtime_input"]["media_chunks"]:
if chunk["mime_type"] == "audio/pcm":
await session.send({"mime_type": "audio/pcm", "data": chunk["data"]})
elif chunk["mime_type"] == "image/jpeg":
await session.send({"mime_type": "image/jpeg", "data": chunk["data"]})
except Exception as e:
print(f"Error sending to Gemini: {e}")
print("Client connection closed (send)")
except Exception as e:
print(f"Error sending to Gemini: {e}")
finally:
print("send_to_gemini closed")
async def receive_from_gemini():
"""Receives responses from the Gemini API and forwards them to the client, looping until turn is complete."""
try:
while True:
try:
print("receiving from gemini")
async for response in session.receive():
#first_response = True
print(f"response: {response}")
if response.server_content is None:
print(f'Unhandled server message! - {response}')
continue
model_turn = response.server_content.model_turn
if model_turn:
for part in model_turn.parts:
print(f"part: {part}")
if hasattr(part, 'text') and part.text is not None:
#print(f"text: {part.text}")
await client_websocket.send(json.dumps({"text": part.text}))
elif hasattr(part, 'inline_data') and part.inline_data is not None:
# if first_response:
print("audio mime_type:", part.inline_data.mime_type)
#first_response = False
base64_audio = base64.b64encode(part.inline_data.data).decode('utf-8')
await client_websocket.send(json.dumps({
"audio": base64_audio,
}))
print("audio received")
if response.server_content.turn_complete:
print('\n<Turn complete>')
except websockets.exceptions.ConnectionClosedOK:
print("Client connection closed normally (receive)")
break # Exit the loop if the connection is closed
except Exception as e:
print(f"Error receiving from Gemini: {e}")
break # exit the lo
except Exception as e:
print(f"Error receiving from Gemini: {e}")
finally:
print("Gemini connection closed (receive)")
# Start send loop
send_task = asyncio.create_task(send_to_gemini())
# Launch receive loop as a background task
receive_task = asyncio.create_task(receive_from_gemini())
await asyncio.gather(send_task, receive_task)
except Exception as e:
print(f"Error in Gemini session: {e}")
finally:
print("Gemini session closed.")
async def main() -> None:
async with websockets.serve(gemini_session_handler, "localhost", 9080):
print("Running websocket server localhost:9080...")
await asyncio.Future() # Keep the server running indefinitely
if __name__ == "__main__":
asyncio.run(main())