-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Multimodal Agent with complete API (#74)
- Loading branch information
Showing
21 changed files
with
1,183 additions
and
927 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
"@livekit/agents": minor | ||
"@livekit/agents-plugin-openai": minor | ||
"livekit-agents-examples": patch | ||
--- | ||
|
||
Rename to MultimodalAgent, move to main package |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,15 @@ | ||
{ | ||
"name": "@livekit/agents", | ||
"version": "0.2.0", | ||
"description": "LiveKit Node Agents", | ||
"description": "LiveKit Agents - Node.js", | ||
"main": "dist/index.js", | ||
"types": "dist/index.d.ts", | ||
"author": "aoife cassidy <[email protected]>", | ||
"author": "LiveKit", | ||
"type": "module", | ||
"scripts": { | ||
"build": "tsc", | ||
"clean": "rm -rf dist", | ||
"clean:build": "pnpm clean && pnpm build", | ||
"lint": "eslint -f unix \"src/**/*.ts\"", | ||
"api:check": "api-extractor run --typescript-compiler-folder ../node_modules/typescript", | ||
"api:update": "api-extractor run --local --typescript-compiler-folder ../node_modules/typescript --verbose" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,254 @@ | ||
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
import type { AudioFrame } from '@livekit/rtc-node'; | ||
import { type AudioSource } from '@livekit/rtc-node'; | ||
import { EventEmitter } from 'events'; | ||
import { AudioByteStream } from '../audio.js'; | ||
import type { TranscriptionForwarder } from '../transcription.js'; | ||
import { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js'; | ||
|
||
export const proto = {}; | ||
|
||
export class PlayoutHandle extends EventEmitter { | ||
#audioSource: AudioSource; | ||
#sampleRate: number; | ||
#itemId: string; | ||
#contentIndex: number; | ||
/** @internal */ | ||
transcriptionFwd: TranscriptionForwarder; | ||
/** @internal */ | ||
doneFut: Future; | ||
/** @internal */ | ||
intFut: Future; | ||
/** @internal */ | ||
#interrupted: boolean; | ||
/** @internal */ | ||
pushedDuration: number; | ||
/** @internal */ | ||
totalPlayedTime: number | undefined; // Set when playout is done | ||
|
||
constructor( | ||
audioSource: AudioSource, | ||
sampleRate: number, | ||
itemId: string, | ||
contentIndex: number, | ||
transcriptionFwd: TranscriptionForwarder, | ||
) { | ||
super(); | ||
this.#audioSource = audioSource; | ||
this.#sampleRate = sampleRate; | ||
this.#itemId = itemId; | ||
this.#contentIndex = contentIndex; | ||
this.transcriptionFwd = transcriptionFwd; | ||
this.doneFut = new Future(); | ||
this.intFut = new Future(); | ||
this.#interrupted = false; | ||
this.pushedDuration = 0; | ||
this.totalPlayedTime = undefined; | ||
} | ||
|
||
get itemId(): string { | ||
return this.#itemId; | ||
} | ||
|
||
get audioSamples(): number { | ||
if (this.totalPlayedTime !== undefined) { | ||
return Math.floor(this.totalPlayedTime * this.#sampleRate); | ||
} | ||
|
||
return Math.floor(this.pushedDuration - this.#audioSource.queuedDuration * this.#sampleRate); | ||
} | ||
|
||
get textChars(): number { | ||
return this.transcriptionFwd.currentCharacterIndex; | ||
} | ||
|
||
get contentIndex(): number { | ||
return this.#contentIndex; | ||
} | ||
|
||
get interrupted(): boolean { | ||
return this.#interrupted; | ||
} | ||
|
||
get done(): boolean { | ||
return this.doneFut.done || this.#interrupted; | ||
} | ||
|
||
interrupt() { | ||
if (this.doneFut.done) return; | ||
this.intFut.resolve(); | ||
this.#interrupted = true; | ||
} | ||
} | ||
|
||
export class AgentPlayout { | ||
#audioSource: AudioSource; | ||
#playoutTask: CancellablePromise<void> | null; | ||
#sampleRate: number; | ||
#numChannels: number; | ||
#inFrameSize: number; | ||
#outFrameSize: number; | ||
constructor( | ||
audioSource: AudioSource, | ||
sampleRate: number, | ||
numChannels: number, | ||
inFrameSize: number, | ||
outFrameSize: number, | ||
) { | ||
this.#audioSource = audioSource; | ||
this.#playoutTask = null; | ||
this.#sampleRate = sampleRate; | ||
this.#numChannels = numChannels; | ||
this.#inFrameSize = inFrameSize; | ||
this.#outFrameSize = outFrameSize; | ||
} | ||
|
||
play( | ||
itemId: string, | ||
contentIndex: number, | ||
transcriptionFwd: TranscriptionForwarder, | ||
textStream: AsyncIterableQueue<string>, | ||
audioStream: AsyncIterableQueue<AudioFrame>, | ||
): PlayoutHandle { | ||
const handle = new PlayoutHandle( | ||
this.#audioSource, | ||
this.#sampleRate, | ||
itemId, | ||
contentIndex, | ||
transcriptionFwd, | ||
); | ||
this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream); | ||
return handle; | ||
} | ||
|
||
#makePlayoutTask( | ||
oldTask: CancellablePromise<void> | null, | ||
handle: PlayoutHandle, | ||
textStream: AsyncIterableQueue<string>, | ||
audioStream: AsyncIterableQueue<AudioFrame>, | ||
): CancellablePromise<void> { | ||
return new CancellablePromise<void>((resolve, reject, onCancel) => { | ||
let cancelled = false; | ||
onCancel(() => { | ||
cancelled = true; | ||
}); | ||
|
||
(async () => { | ||
try { | ||
if (oldTask) { | ||
await gracefullyCancel(oldTask); | ||
} | ||
|
||
let firstFrame = true; | ||
|
||
const readText = () => | ||
new CancellablePromise<void>((resolveText, rejectText, onCancelText) => { | ||
let cancelledText = false; | ||
onCancelText(() => { | ||
cancelledText = true; | ||
}); | ||
|
||
(async () => { | ||
try { | ||
for await (const text of textStream) { | ||
if (cancelledText || cancelled) { | ||
break; | ||
} | ||
handle.transcriptionFwd.pushText(text); | ||
} | ||
resolveText(); | ||
} catch (error) { | ||
rejectText(error); | ||
} | ||
})(); | ||
}); | ||
|
||
const capture = () => | ||
new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => { | ||
let cancelledCapture = false; | ||
onCancelCapture(() => { | ||
cancelledCapture = true; | ||
}); | ||
|
||
(async () => { | ||
try { | ||
const samplesPerChannel = this.#outFrameSize; | ||
const bstream = new AudioByteStream( | ||
this.#sampleRate, | ||
this.#numChannels, | ||
samplesPerChannel, | ||
); | ||
|
||
for await (const frame of audioStream) { | ||
if (cancelledCapture || cancelled) { | ||
break; | ||
} | ||
if (firstFrame) { | ||
handle.transcriptionFwd.start(); | ||
firstFrame = false; | ||
} | ||
|
||
handle.transcriptionFwd.pushAudio(frame); | ||
|
||
for (const f of bstream.write(frame.data.buffer)) { | ||
handle.pushedDuration += f.samplesPerChannel / f.sampleRate; | ||
await this.#audioSource.captureFrame(f); | ||
} | ||
} | ||
|
||
if (!cancelledCapture && !cancelled) { | ||
for (const f of bstream.flush()) { | ||
handle.pushedDuration += f.samplesPerChannel / f.sampleRate; | ||
await this.#audioSource.captureFrame(f); | ||
} | ||
|
||
handle.transcriptionFwd.markAudioComplete(); | ||
|
||
await this.#audioSource.waitForPlayout(); | ||
} | ||
|
||
resolveCapture(); | ||
} catch (error) { | ||
rejectCapture(error); | ||
} | ||
})(); | ||
}); | ||
|
||
const readTextTask = readText(); | ||
const captureTask = capture(); | ||
|
||
try { | ||
await Promise.race([captureTask, handle.intFut.await]); | ||
} finally { | ||
if (!captureTask.isCancelled) { | ||
await gracefullyCancel(captureTask); | ||
} | ||
|
||
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration; | ||
|
||
if (handle.interrupted || captureTask.error) { | ||
this.#audioSource.clearQueue(); // make sure to remove any queued frames | ||
} | ||
|
||
if (!readTextTask.isCancelled) { | ||
await gracefullyCancel(readTextTask); | ||
} | ||
|
||
if (!firstFrame && !handle.interrupted) { | ||
handle.transcriptionFwd.markTextComplete(); | ||
} | ||
|
||
handle.doneFut.resolve(); | ||
await handle.transcriptionFwd.close(handle.interrupted); | ||
} | ||
|
||
resolve(); | ||
} catch (error) { | ||
reject(error); | ||
} | ||
})(); | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
export * from './multimodal_agent.js'; | ||
export * from './agent_playout.js'; |
Oops, something went wrong.