-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
217 lines (183 loc) · 7.28 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from __future__ import annotations
import asyncio
import logging
from dotenv import load_dotenv
import json
import os
from time import perf_counter
from typing import Annotated
from livekit import rtc, api
from livekit.agents import (
AutoSubscribe,
JobContext,
JobProcess,
WorkerOptions,
cli,
llm,
)
from livekit.agents.multimodal import MultimodalAgent
from livekit.agents.pipeline import VoicePipelineAgent
from livekit.plugins import deepgram, openai, silero
# load environment variables, this is optional, only used for local development
load_dotenv(dotenv_path=".env.local")
logger = logging.getLogger("outbound-caller")
logger.setLevel(logging.INFO)
outbound_trunk_id = os.getenv("SIP_OUTBOUND_TRUNK_ID")
_default_instructions = (
"You are a scheduling assistant for a dental practice. Your interface with user will be voice."
"You will be on a call with a patient who has an upcoming appointment. Your goal is to confirm the appointment details."
"As a customer service representative, you will be polite and professional at all times. Allow user to end the conversation."
)
async def entrypoint(ctx: JobContext):
global _default_instructions, outbound_trunk_id
logger.info(f"connecting to room {ctx.room.name}")
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
user_identity = "phone_user"
# the phone number to dial is provided in the job metadata
phone_number = ctx.job.metadata
logger.info(f"dialing {phone_number} to room {ctx.room.name}")
# look up the user's phone number and appointment details
instructions = (
_default_instructions
+ "The customer's name is Jayden. His appointment is next Tuesday at 3pm."
)
# `create_sip_participant` starts dialing the user
await ctx.api.sip.create_sip_participant(
api.CreateSIPParticipantRequest(
room_name=ctx.room.name,
sip_trunk_id=outbound_trunk_id,
sip_call_to=phone_number,
participant_identity=user_identity,
)
)
# a participant is created as soon as we start dialing
participant = await ctx.wait_for_participant(identity=user_identity)
# start the agent, either a VoicePipelineAgent or MultimodalAgent
# this can be started before the user picks up. The agent will only start
# speaking once the user answers the call.
# run_voice_pipeline_agent(ctx, participant, instructions)
run_multimodal_agent(ctx, participant, instructions)
# in addition, you can monitor the call status separately
start_time = perf_counter()
while perf_counter() - start_time < 30:
call_status = participant.attributes.get("sip.callStatus")
if call_status == "active":
logger.info("user has picked up")
return
elif call_status == "automation":
# if DTMF is used in the `sip_call_to` number, typically used to dial
# an extension or enter a PIN.
# during DTMF dialing, the participant will be in the "automation" state
pass
elif call_status == "hangup":
# user hung up, we'll exit the job
logger.info("user hung up, exiting job")
break
await asyncio.sleep(0.1)
logger.info("session timed out, exiting job")
ctx.shutdown()
class CallActions(llm.FunctionContext):
"""
Detect user intent and perform actions
"""
def __init__(
self, *, api: api.LiveKitAPI, participant: rtc.RemoteParticipant, room: rtc.Room
):
super().__init__()
self.api = api
self.participant = participant
self.room = room
async def hangup(self):
try:
await self.api.room.remove_participant(
api.RoomParticipantIdentity(
room=self.room.name,
identity=self.participant.identity,
)
)
except Exception as e:
# it's possible that the user has already hung up, this error can be ignored
logger.info(f"received error while ending call: {e}")
@llm.ai_callable()
async def end_call(self):
"""Called when the user wants to end the call"""
logger.info(f"ending the call for {self.participant.identity}")
await self.hangup()
@llm.ai_callable()
async def look_up_availability(
self,
date: Annotated[str, "The date of the appointment to check availability for"],
):
"""Called when the user asks about alternative appointment availability"""
logger.info(
f"looking up availability for {self.participant.identity} on {date}"
)
asyncio.sleep(3)
return json.dumps(
{
"available_times": ["1pm", "2pm", "3pm"],
}
)
@llm.ai_callable()
async def confirm_appointment(
self,
date: Annotated[str, "date of the appointment"],
time: Annotated[str, "time of the appointment"],
):
"""Called when the user confirms their appointment on a specific date. Use this tool only when they are certain about the date and time."""
logger.info(
f"confirming appointment for {self.participant.identity} on {date} at {time}"
)
return "reservation confirmed"
@llm.ai_callable()
async def detected_answering_machine(self):
"""Called when the call reaches voicemail. Use this tool AFTER you hear the voicemail greeting"""
logger.info(f"detected answering machine for {self.participant.identity}")
await self.hangup()
def run_voice_pipeline_agent(
ctx: JobContext, participant: rtc.RemoteParticipant, instructions: str
):
logger.info("starting voice pipeline agent")
initial_ctx = llm.ChatContext().append(
role="system",
text=instructions,
)
agent = VoicePipelineAgent(
vad=ctx.proc.userdata["vad"],
stt=deepgram.STT(model="nova-2-phonecall"),
llm=openai.LLM(),
tts=openai.TTS(),
chat_ctx=initial_ctx,
fnc_ctx=CallActions(api=ctx.api, participant=participant, room=ctx.room),
)
agent.start(ctx.room, participant)
def run_multimodal_agent(
ctx: JobContext, participant: rtc.RemoteParticipant, instructions: str
):
logger.info("starting multimodal agent")
model = openai.realtime.RealtimeModel(
instructions=instructions,
modalities=["audio", "text"],
)
agent = MultimodalAgent(
model=model,
fnc_ctx=CallActions(api=ctx.api, participant=participant, room=ctx.room),
)
agent.start(ctx.room, participant)
def prewarm(proc: JobProcess):
proc.userdata["vad"] = silero.VAD.load()
if __name__ == "__main__":
if not outbound_trunk_id or not outbound_trunk_id.startswith("ST_"):
raise ValueError(
"SIP_OUTBOUND_TRUNK_ID is not set. Please follow the guide at https://docs.livekit.io/agents/quickstarts/outbound-calls/ to set it up."
)
cli.run_app(
WorkerOptions(
entrypoint_fnc=entrypoint,
# giving this agent a name will allow us to dispatch it via API
# automatic dispatch is disabled when `agent_name` is set
agent_name="outbound-caller",
# prewarm by loading the VAD model, needed only for VoicePipelineAgent
prewarm_fnc=prewarm,
)
)