diff --git a/agent/agent.py b/agent/agent.py index aae2c8a..2e359bf 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -1,12 +1,26 @@ import asyncio +import logging from dotenv import load_dotenv from livekit import rtc -from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli, llm +from livekit.agents import ( + AutoSubscribe, + JobContext, + JobProcess, + WorkerOptions, + cli, + llm, +) from livekit.agents.voice_assistant import VoiceAssistant from livekit.plugins import deepgram, openai, silero + load_dotenv(dotenv_path=".env.local") +logger = logging.getLogger("voice-assistant") + + +def prewarm(proc: JobProcess): + proc.userdata["vad"] = silero.VAD.load() async def entrypoint(ctx: JobContext): @@ -22,35 +36,29 @@ async def entrypoint(ctx: JobContext): ), ) + logger.info(f"connecting to room {ctx.room.name}") await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) + # wait for the first participant to connect + participant = await ctx.wait_for_participant() + logger.info(f"starting voice assistant for participant {participant.identity}") + + dg_model = "nova-2-general" + if participant.kind == rtc.ParticipantKind.PARTICIPANT_KIND_SIP: + # use a model optimized for telephony + dg_model = "nova-2-phonecall" + assistant = VoiceAssistant( - vad=silero.VAD.load(), - stt=deepgram.STT(), + vad=ctx.proc.userdata["vad"], + stt=deepgram.STT(model=dg_model), llm=openai.LLM(), tts=openai.TTS(), chat_ctx=initial_ctx, ) - assistant.start(ctx.room) - - # listen to incoming chat messages, only required if you'd like the agent to - # answer incoming messages from Chat - chat = rtc.ChatManager(ctx.room) - - async def answer_from_text(txt: str): - chat_ctx = assistant.chat_ctx.copy() - chat_ctx.append(role="user", text=txt) - stream = assistant.llm.chat(chat_ctx=chat_ctx) - await assistant.say(stream) - - @chat.on("message_received") - def on_chat_received(msg: rtc.ChatMessage): - if msg.message: - asyncio.create_task(answer_from_text(msg.message)) - await asyncio.sleep(1) + assistant.start(ctx.room, participant) await assistant.say("Hey, how can I help you today?", allow_interruptions=True) if __name__ == "__main__": - cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint)) + cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm)) diff --git a/agent/requirements.txt b/agent/requirements.txt index a176044..bc7f6ab 100644 --- a/agent/requirements.txt +++ b/agent/requirements.txt @@ -1,5 +1,6 @@ -livekit-agents>=0.8.5 -livekit-plugins-openai>=0.8.0 -livekit-plugins-deepgram>=0.6.4 -livekit-plugins-silero>=0.6.3 +livekit-agents>=0.8.12 +livekit-plugins-openai>=0.8.3 +livekit-plugins-deepgram>=0.6.7 +livekit-plugins-silero>=0.6.4 python-dotenv~=1.0 +aiofile~=3.8.8 diff --git a/taskfile.yaml b/taskfile.yaml index 28d172c..b3f8b5c 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -26,6 +26,7 @@ tasks: install_agent: dir: "agent" + interactive: true cmds: - "python3 -m venv venv" - cmd: "source venv/bin/activate" @@ -51,11 +52,13 @@ tasks: dev_ui: dir: "frontend" + interactive: true cmds: - "pnpm dev" dev_agent: dir: "agent" + interactive: true cmds: - cmd: "source venv/bin/activate" platforms: