Skip to content

Commit

Permalink
Update speaker context handling to use safe references (partial fix…
Browse files Browse the repository at this point in the history
… for incompatible plugins)

Update `speak` message handling to use `Message.data['lang']` instead of hard-coded default
Updated logging and dependencies
  • Loading branch information
NeonDaniel committed Aug 16, 2023
1 parent 5ffd36c commit fc2697c
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 24 deletions.
5 changes: 3 additions & 2 deletions neon_audio/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,16 @@ def handle_get_tts(self, message):
ident = message.context.get("ident") or "neon.get_tts.response"
LOG.info(f"Handling TTS request: {ident}")
if not message.data.get("speaker"):
LOG.warning(f"No speaker data with request, "
f"core defaults will be used.")
LOG.info(f"No speaker data with request, "
f"core defaults will be used.")
if text:
if not isinstance(text, str):
self.bus.emit(message.reply(
ident, data={"error": f"text is not a str: {text}"}))
return
try:
responses = self.tts.get_multiple_tts(message)
LOG.debug(f"Emitting response: {responses}")
self.bus.emit(message.reply(ident, data=responses))
except Exception as e:
LOG.exception(e)
Expand Down
43 changes: 23 additions & 20 deletions neon_audio/tts/neon.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ def get_requested_tts_languages(msg) -> list:
# If speaker data is present, use it
if msg.data.get("speaker"):
speaker = msg.data.get("speaker")
tts_reqs.append({"speaker": speaker["name"],
"language": speaker["language"],
"gender": speaker["gender"],
tts_reqs.append({"speaker": speaker.get("name", "Neon"),
"language": speaker.get("language",
msg.data.get("lang")),
"gender": speaker.get("gender", default_gender),
"voice": speaker.get("voice")
})
LOG.debug(f">>> speaker={speaker}")
LOG.info(f">>> speaker={speaker}")

# If multiple profiles attached to message, get TTS for all
elif profiles:
Expand Down Expand Up @@ -245,42 +246,44 @@ def get_multiple_tts(self, message, **kwargs) -> dict:
LOG.debug(f"tts_requested={tts_requested}")
sentence = message.data["text"]
sentence = self.validate_ssml(sentence)
skill_lang = message.data.get('lang') or self.lang
LOG.debug(f"utterance_lang={skill_lang}")
responses = {}
for request in tts_requested:
lang = kwargs["lang"] = request["language"]
tts_lang = kwargs["lang"] = request["language"]
# Check if requested tts lang matches internal (text) lang
# TODO: `self.lang` should come from the incoming message
if lang.split("-")[0] != self.lang.split("-")[0]:
self.cached_translations.setdefault(lang, {})
if tts_lang.split("-")[0] != skill_lang.split("-")[0]:
self.cached_translations.setdefault(tts_lang, {})

tx_sentence = self.cached_translations[lang].get(sentence)
tx_sentence = self.cached_translations[tts_lang].get(sentence)
if not tx_sentence:
tx_sentence = self.translator.translate(sentence, lang,
self.lang)
self.cached_translations[lang][sentence] = tx_sentence
tx_sentence = self.translator.translate(sentence, tts_lang,
skill_lang)
self.cached_translations[tts_lang][sentence] = tx_sentence
self.cached_translations.store()
LOG.info(f"Got translated sentence: {tx_sentence}")
else:
tx_sentence = sentence
wav_file, phonemes = self._get_tts(tx_sentence, request, **kwargs)

# If this is the first response, populate translation and phonemes
if not responses.get(lang):
responses[lang] = {"sentence": tx_sentence,
"translated": tx_sentence != sentence,
"phonemes": phonemes,
"genders": list()}
responses.setdefault(tts_lang, {"sentence": tx_sentence,
"translated": tx_sentence != sentence,
"phonemes": phonemes,
"genders": list()})

# Append the generated audio from this request
if os.path.isfile(wav_file):
responses[lang][request["gender"]] = wav_file
responses[lang]["genders"].append(request["gender"])
responses[tts_lang][request["gender"]] = wav_file
responses[tts_lang]["genders"].append(request["gender"])
# If this is a remote request, encode audio in the response
if message.context.get("klat_data") or \
message.msg_type == "neon.get_tts":
responses[lang].setdefault("audio", {})
responses[lang]["audio"][request["gender"]] = \
responses[tts_lang].setdefault("audio", {})
responses[tts_lang]["audio"][request["gender"]] = \
encode_file_to_base64_string(wav_file)
LOG.debug(f"Got {tts_lang} {request['gender']} response")
else:
raise RuntimeError(f"No audio generated for request: {request}")
return responses
Expand Down
4 changes: 2 additions & 2 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ovos-audio~=0.0.1,>=0.0.2a12,<0.0.2a14
# TODO: ovos-audio 0.0.2a14 introduces a breaking change around `ident` handling
ovos-utils~=0.0.34
ovos-config~=0.0.7
ovos-utils==0.0.35
ovos-config~=0.0.10
phoneme-guesser~=0.1
ovos-plugin-manager~=0.0.22,>=0.0.24a5
neon-utils[network]~=1.6
Expand Down

0 comments on commit fc2697c

Please sign in to comment.