Skip to content

Commit

Permalink
elevenlabs: error on non-PCM data
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Aug 3, 2024
1 parent 88e75d7 commit 297db92
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 11 deletions.
5 changes: 5 additions & 0 deletions .changeset/violet-students-shout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-plugins-elevenlabs": patch
---

gracefully error on non-PCM data
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def __init__(
) -> None:
super().__init__()
self._text, self._opts, self._session = text, opts, session
if _encoding_from_format(self._opts.encoding) == "mp3":
self._mp3_decoder = utils.codecs.Mp3StreamDecoder()

@utils.log_exceptions(logger=logger)
async def _main_task(self) -> None:
Expand All @@ -181,21 +183,39 @@ async def _main_task(self) -> None:
headers={AUTHORIZATION_HEADER: self._opts.api_key},
json=data,
) as resp:
async for bytes_data, _ in resp.content.iter_chunks():
for frame in bstream.write(bytes_data):
if not resp.content_type.startswith("audio/"):
content = await resp.text()
logger.error("11labs returned non-audio data: %s", content)
return
encoding = _encoding_from_format(self._opts.encoding)
if encoding == "mp3":
async for bytes_data, _ in resp.content.iter_chunks():
for frame in self._mp3_decoder.decode_chunk(bytes_data):
self._event_ch.send_nowait(
tts.SynthesizedAudio(
request_id=request_id,
segment_id=segment_id,
frame=frame,
)
)
else:
async for bytes_data, _ in resp.content.iter_chunks():
for frame in bstream.write(bytes_data):
self._event_ch.send_nowait(
tts.SynthesizedAudio(
request_id=request_id,
segment_id=segment_id,
frame=frame,
)
)

for frame in bstream.flush():
self._event_ch.send_nowait(
tts.SynthesizedAudio(
request_id=request_id, segment_id=segment_id, frame=frame
)
)

for frame in bstream.flush():
self._event_ch.send_nowait(
tts.SynthesizedAudio(
request_id=request_id, segment_id=segment_id, frame=frame
)
)


class SynthesizeStream(tts.SynthesizeStream):
"""Streamed API using websockets"""
Expand Down Expand Up @@ -388,11 +408,11 @@ def _synthesize_url(opts: _TTSOptions) -> str:
base_url = opts.base_url
voice_id = opts.voice.id
model_id = opts.model_id
sample_rate = _sample_rate_from_format(opts.encoding)
output_format = opts.encoding
latency = opts.streaming_latency
return (
f"{base_url}/text-to-speech/{voice_id}/stream?"
f"model_id={model_id}&output_format=pcm_{sample_rate}&optimize_streaming_latency={latency}"
f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}"
)


Expand Down

0 comments on commit 297db92

Please sign in to comment.