From 778c7e42be739c12fa04f29ed32efc48a1d43808 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:49:02 -0800 Subject: [PATCH 01/16] Resample API input wav audio to ensure format matches listener config (#180) # Description Adds explicit handling of API input audio to set sample_rate, sample_width, and channels to match listener configuration Refactor to prevent creating an audio stream object not used for non-streaming STT # Issues https://github.com/NeonGeckoCom/neon-iris/issues/28 # Other Notes Co-authored-by: Daniel McKnight --- neon_speech/service.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index 769842b..4f1f3d9 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -456,15 +456,21 @@ def _get_stt_from_file(self, wav_file: str, :return: (AudioData of object, extracted context, transcriptions) """ from neon_utils.file_utils import get_audio_file_stream - lang = lang or 'en-us' # TODO: read default from config - segment = AudioSegment.from_file(wav_file) + lang = lang or self.config.get('lang') + desired_sample_rate = self.config['listener'].get('sample_rate', 16000) + desired_sample_width = self.config['listener'].get('sample_width', 2) + segment = (AudioSegment.from_file(wav_file).set_channels(1) + .set_frame_rate(desired_sample_rate) + .set_sample_width(desired_sample_width)) + LOG.debug(f"Audio fr={segment.frame_rate},sw={segment.sample_width}," + f"fw={segment.frame_width},ch={segment.channels}") audio_data = AudioData(segment.raw_data, segment.frame_rate, segment.sample_width) - audio_stream = get_audio_file_stream(wav_file) if not self.api_stt: raise RuntimeError("api_stt not initialized." " is `listener['enable_stt_api'] set to False?") if hasattr(self.api_stt, 'stream_start'): + audio_stream = get_audio_file_stream(wav_file, desired_sample_rate) if self.lock.acquire(True, 30): LOG.info(f"Starting STT processing (lang={lang}): {wav_file}") self.api_stt.stream_start(lang) From 588721b692b05e2fa4a502348c37df4dc3db3b6e Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Thu, 9 Nov 2023 01:49:17 +0000 Subject: [PATCH 02/16] Increment Version to 4.2.1a1 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index de7eb74..6486924 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.0" +__version__ = "4.2.1a1" From b8917d801af5df355e2c0f405160c16f99a637b8 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Thu, 9 Nov 2023 17:40:38 -0800 Subject: [PATCH 03/16] Add timing metrics for audio input to handler in speech service (#181) # Description Handles `client_sent` timestamp and calculates how long it took for the message to be handled in this module # Issues # Other Notes https://github.com/NeonGeckoCom/neon_audio/pull/154 Co-authored-by: Daniel McKnight --- neon_speech/service.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/neon_speech/service.py b/neon_speech/service.py index 4f1f3d9..a5c35d5 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -375,6 +375,9 @@ def build_context(msg: Message): 'transcribed': time()}} return ctx + received_time = time() + sent_time = message.context.get("timing", {}).get("client_sent", + received_time) ident = message.context.get("ident") or "neon.audio_input.response" LOG.info(f"Handling audio input: {ident}") if message.data.get("audio_data"): @@ -385,10 +388,13 @@ def build_context(msg: Message): lang = message.data.get("lang") try: with self._stopwatch: + # _=transformed audio_data _, parser_data, transcriptions = \ self._get_stt_from_file(wav_file_path, lang) message.context["audio_parser_data"] = parser_data context = build_context(message) + if received_time != sent_time: + context['timing']['mq_from_client'] = received_time - sent_time context['timing']['get_stt'] = self._stopwatch.time data = { "utterances": transcriptions, From b433e95ed859acfdaecf2ccc9a53c57b2155ca9c Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Fri, 10 Nov 2023 01:40:55 +0000 Subject: [PATCH 04/16] Increment Version to 4.2.1a2 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index 6486924..c533f4d 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a1" +__version__ = "4.2.1a2" From 9f913d00863bd6d6fa7b53909a644e249409e217 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Mon, 13 Nov 2023 17:18:46 -0800 Subject: [PATCH 05/16] Improved timing context handling with unit tests (#182) # Description Allows for `timing` context sent from a connected client Adds unit tests to validate timing context handling # Issues Continues #181 # Other Notes --------- Co-authored-by: Daniel McKnight --- neon_speech/service.py | 96 +++++++++++++++++---------- tests/api_method_tests.py | 134 +++++++++++++++++++++++++------------- 2 files changed, 152 insertions(+), 78 deletions(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index a5c35d5..e6feb36 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -27,6 +27,8 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os +from typing import Dict + import ovos_dinkum_listener.plugins from tempfile import mkstemp @@ -80,8 +82,6 @@ def on_started(): class NeonSpeechClient(OVOSDinkumVoiceService): - _stopwatch = Stopwatch("get_stt") - def __init__(self, ready_hook=on_ready, error_hook=on_error, stopping_hook=on_stopping, alive_hook=on_alive, started_hook=on_started, watchdog=lambda: None, @@ -327,6 +327,7 @@ def handle_get_stt(self, message: Message): Emits a response to the sender with stt data or error data :param message: Message associated with request """ + received_time = time() if message.data.get("audio_data"): wav_file_path = self._write_encoded_file( message.data.pop("audio_data")) @@ -334,24 +335,38 @@ def handle_get_stt(self, message: Message): wav_file_path = message.data.get("audio_file") lang = message.data.get("lang") ident = message.context.get("ident") or "neon.get_stt.response" + + message.context.setdefault("timing", dict()) LOG.info(f"Handling STT request: {ident}") if not wav_file_path: + message.context['timing']['response_sent'] = time() self.bus.emit(message.reply( ident, data={"error": f"audio_file not specified!"})) return if not os.path.isfile(wav_file_path): + message.context['timing']['response_sent'] = time() self.bus.emit(message.reply( ident, data={"error": f"{wav_file_path} Not found!"})) try: + _, parser_data, transcriptions = \ self._get_stt_from_file(wav_file_path, lang) + timing = parser_data.pop('timing') + message.context["timing"] = {**message.context["timing"], **timing} + sent_time = message.context["timing"].get("client_sent", + received_time) + if received_time != sent_time: + message.context['timing']['client_to_core'] = \ + received_time - sent_time + message.context['timing']['response_sent'] = time() self.bus.emit(message.reply(ident, data={"parser_data": parser_data, "transcripts": transcriptions})) except Exception as e: LOG.error(e) + message.context['timing']['response_sent'] = time() self.bus.emit(message.reply(ident, data={"error": repr(e)})) def handle_audio_input(self, message): @@ -370,14 +385,18 @@ def build_context(msg: Message): 'username': self._default_user["user"]["username"] or "local", 'user_profiles': [self._default_user.content]} - ctx = {**defaults, **ctx, 'destination': ['skills'], - 'timing': {'start': msg.data.get('time'), - 'transcribed': time()}} + ctx = {**defaults, **ctx, 'destination': ['skills']} + ctx['timing'] = {**ctx.get('timing', {}), + **{'start': msg.data.get('time'), + 'transcribed': time()}} return ctx received_time = time() sent_time = message.context.get("timing", {}).get("client_sent", received_time) + if received_time != sent_time: + message.context['timing']['client_to_core'] = \ + received_time - sent_time ident = message.context.get("ident") or "neon.audio_input.response" LOG.info(f"Handling audio input: {ident}") if message.data.get("audio_data"): @@ -387,21 +406,23 @@ def build_context(msg: Message): wav_file_path = message.data.get("audio_file") lang = message.data.get("lang") try: - with self._stopwatch: - # _=transformed audio_data - _, parser_data, transcriptions = \ - self._get_stt_from_file(wav_file_path, lang) + # _=transformed audio_data + _, parser_data, transcriptions = \ + self._get_stt_from_file(wav_file_path, lang) + timing = parser_data.pop('timing') message.context["audio_parser_data"] = parser_data + message.context.setdefault('timing', dict()) + message.context['timing'] = {**timing, **message.context['timing']} context = build_context(message) - if received_time != sent_time: - context['timing']['mq_from_client'] = received_time - sent_time - context['timing']['get_stt'] = self._stopwatch.time data = { "utterances": transcriptions, "lang": message.data.get("lang", "en-us") } + # Send a new message to the skills module with proper routing ctx handled = self._emit_utterance_to_skills(Message( 'recognizer_loop:utterance', data, context)) + + # Reply to original message with transcription/audio parser data self.bus.emit(message.reply(ident, data={"parser_data": parser_data, "transcripts": transcriptions, @@ -429,7 +450,7 @@ def handle_offline(self, _): Handle notification to operate in offline mode """ LOG.info("Offline mode selected, Reloading STT Plugin") - config = dict(self.config) + config: Dict[str, dict] = dict(self.config) if config['stt'].get('offline_module'): config['stt']['module'] = config['stt'].get('offline_module') self.voice_loop.stt = STTFactory.create(config) @@ -462,6 +483,7 @@ def _get_stt_from_file(self, wav_file: str, :return: (AudioData of object, extracted context, transcriptions) """ from neon_utils.file_utils import get_audio_file_stream + _stopwatch = Stopwatch() lang = lang or self.config.get('lang') desired_sample_rate = self.config['listener'].get('sample_rate', 16000) desired_sample_width = self.config['listener'].get('sample_width', 2) @@ -475,28 +497,34 @@ def _get_stt_from_file(self, wav_file: str, if not self.api_stt: raise RuntimeError("api_stt not initialized." " is `listener['enable_stt_api'] set to False?") - if hasattr(self.api_stt, 'stream_start'): - audio_stream = get_audio_file_stream(wav_file, desired_sample_rate) - if self.lock.acquire(True, 30): - LOG.info(f"Starting STT processing (lang={lang}): {wav_file}") - self.api_stt.stream_start(lang) - while True: - try: - data = audio_stream.read(1024) - self.api_stt.stream_data(data) - except EOFError: - break - transcriptions = self.api_stt.stream_stop() - self.lock.release() + with _stopwatch: + if hasattr(self.api_stt, 'stream_start'): + audio_stream = get_audio_file_stream(wav_file, desired_sample_rate) + if self.lock.acquire(True, 30): + LOG.info(f"Starting STT processing (lang={lang}): {wav_file}") + self.api_stt.stream_start(lang) + while True: + try: + data = audio_stream.read(1024) + self.api_stt.stream_data(data) + except EOFError: + break + transcriptions = self.api_stt.stream_stop() + self.lock.release() + else: + LOG.error(f"Timed out acquiring lock, not processing: {wav_file}") + transcriptions = [] else: - LOG.error(f"Timed out acquiring lock, not processing: {wav_file}") - transcriptions = [] - else: - transcriptions = self.api_stt.execute(audio_data, lang) - if isinstance(transcriptions, str): - LOG.warning("Transcriptions is a str, no alternatives provided") - transcriptions = [transcriptions] - audio, audio_context = self.transformers.transform(audio_data) + transcriptions = self.api_stt.execute(audio_data, lang) + if isinstance(transcriptions, str): + LOG.warning("Transcriptions is a str, no alternatives provided") + transcriptions = [transcriptions] + + get_stt = float(_stopwatch.time) + with _stopwatch: + audio, audio_context = self.transformers.transform(audio_data) + audio_context["timing"] = {"get_stt": get_stt, + "transform_audio": _stopwatch.time} LOG.info(f"Transcribed: {transcriptions}") return audio, audio_context, transcriptions diff --git a/tests/api_method_tests.py b/tests/api_method_tests.py index c1c4d07..2354b52 100644 --- a/tests/api_method_tests.py +++ b/tests/api_method_tests.py @@ -69,18 +69,18 @@ def setUpClass(cls) -> None: test_config["listener"]["VAD"]["module"] = "dummy" assert test_config["stt"]["module"] == "deepspeech_stream_local" + ready_event = Event() + + def _ready(): + ready_event.set() + cls.speech_service = NeonSpeechClient(speech_config=test_config, - daemonic=False, bus=cls.bus) + daemonic=False, bus=cls.bus, + ready_hook=_ready) assert cls.speech_service.config["stt"]["module"] == "deepspeech_stream_local" cls.speech_service.start() - ready = False - timeout = time() + 120 - while not ready and time() < timeout: - message = cls.bus.wait_for_response( - Message("mycroft.voice.is_ready")) - if message: - ready = message.data.get("status") - if not ready: + + if not ready_event.wait(120): raise TimeoutError("Speech module not ready after 120 seconds") from ovos_plugin_manager.templates import STT assert isinstance(cls.speech_service.voice_loop.stt, STT) @@ -105,6 +105,8 @@ def test_get_stt_no_file(self): {}, context), context["ident"]) self.assertEqual(stt_resp.context, context) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) self.assertEqual(stt_resp.data["error"], "audio_file not specified!") @@ -114,8 +116,11 @@ def test_get_stt_invalid_file_path(self): "user": "TestRunner"} stt_resp = self.bus.wait_for_response( Message("neon.get_stt", {"audio_file": "~/invalid_file.wav"}, - context), context["ident"]) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"]) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) def test_get_stt_invalid_file_type(self): @@ -125,24 +130,36 @@ def test_get_stt_invalid_file_type(self): stt_resp = self.bus.wait_for_response( Message("neon.get_stt", {"audio_file": os.path.join(AUDIO_FILE_PATH, "test.txt")}, - context), context["ident"]) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"]) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) def test_get_stt_valid_file(self): context = {"client": "tester", "ident": "12345", - "user": "TestRunner"} + "user": "TestRunner", + "timing": {"client_sent": time()}} stt_resp = self.bus.wait_for_response(Message( "neon.get_stt", {"audio_file": os.path.join(AUDIO_FILE_PATH, "stop.wav")}, - context), context["ident"], 60.0) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"], 60.0) + for key in context: + if key != 'timing': + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("parser_data"), dict, stt_resp.serialize()) self.assertIsInstance(stt_resp.data.get("transcripts"), list, stt_resp.serialize()) self.assertIn("stop", stt_resp.data.get("transcripts")) + self.assertEqual(stt_resp.context['timing']['client_sent'], + context['timing']['client_sent'], stt_resp.context) + self.assertIsInstance(stt_resp.context['timing']['client_to_core'], + float, stt_resp.context) def test_get_stt_valid_contents(self): context = {"client": "tester", @@ -152,8 +169,11 @@ def test_get_stt_valid_contents(self): "stop.wav")) stt_resp = self.bus.wait_for_response(Message( "neon.get_stt", {"audio_data": audio_data}, - context), context["ident"], 60.0) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"], 60.0) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("parser_data"), dict, stt_resp.serialize()) self.assertIsInstance(stt_resp.data.get("transcripts"), list, @@ -166,29 +186,38 @@ def test_audio_input_valid(self): context = {"client": "tester", "ident": "11111", "user": "TestRunner", - "extra_data": "something"} + "extra_data": "something", + "timing": {"client_sent": time()}} audio_data = encode_file_to_base64_string(os.path.join(AUDIO_FILE_PATH, "stop.wav")) stt_resp = self.bus.wait_for_response(Message( "neon.audio_input", {"audio_data": audio_data}, - context), context["ident"], 60.0) + dict(context)), context["ident"], 60.0) self.assertIsInstance(stt_resp, Message) for key in context: self.assertIn(key, stt_resp.context) - self.assertEqual(context[key], stt_resp.context[key]) + if key != "timing": + self.assertEqual(context[key], stt_resp.context[key]) self.assertIsInstance(stt_resp.data.get("skills_recv"), bool, stt_resp.serialize()) + self.assertIsInstance(stt_resp.context['timing']['client_to_core'], + float, stt_resp.context) handle_utterance.assert_called_once() message = handle_utterance.call_args[0][0] self.assertIsInstance(message, Message) for key in context: self.assertIn(key, message.context) - self.assertEqual(context[key], message.context[key]) + if key != "timing": + self.assertEqual(context[key], message.context[key]) self.assertIsInstance(message.data["utterances"], list, message.data) self.assertIn("stop", message.data["utterances"], message.data.get("utterances")) self.assertIsInstance(message.context["timing"], dict) + self.assertIsInstance(message.context['timing']['client_to_core'], + float, message.context) + self.assertIsInstance(message.context['timing']['transcribed'], float, + message.context) self.assertEqual(message.context["destination"], ["skills"]) def test_wake_words_state(self): @@ -256,19 +285,19 @@ def setUpClass(cls) -> None: test_config["listener"]["VAD"]["module"] = "dummy" assert test_config["stt"]["module"] == "neon-stt-plugin-nemo" + ready_event = Event() + + def _ready(): + ready_event.set() + cls.speech_service = NeonSpeechClient(speech_config=test_config, - daemonic=False, bus=cls.bus) + daemonic=False, bus=cls.bus, + ready_hook=_ready) assert cls.speech_service.config["stt"]["module"] == \ "neon-stt-plugin-nemo" cls.speech_service.start() - ready = False - timeout = time() + 120 - while not ready and time() < timeout: - message = cls.bus.wait_for_response( - Message("mycroft.voice.is_ready")) - if message: - ready = message.data.get("status") - if not ready: + + if not ready_event.wait(120): raise TimeoutError("Speech module not ready after 120 seconds") from ovos_plugin_manager.templates import STT assert isinstance(cls.speech_service.voice_loop.stt, STT) @@ -290,9 +319,12 @@ def test_get_stt_no_file(self): "ident": "123", "user": "TestRunner"} stt_resp = self.bus.wait_for_response(Message("neon.get_stt", - {}, context), + {}, dict(context)), context["ident"]) - self.assertEqual(stt_resp.context, context) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) self.assertEqual(stt_resp.data["error"], "audio_file not specified!") @@ -302,8 +334,11 @@ def test_get_stt_invalid_file_path(self): "user": "TestRunner"} stt_resp = self.bus.wait_for_response( Message("neon.get_stt", {"audio_file": "~/invalid_file.wav"}, - context), context["ident"]) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"]) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) def test_get_stt_invalid_file_type(self): @@ -313,8 +348,11 @@ def test_get_stt_invalid_file_type(self): stt_resp = self.bus.wait_for_response( Message("neon.get_stt", {"audio_file": os.path.join(AUDIO_FILE_PATH, "test.txt")}, - context), context["ident"]) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"]) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("error"), str) def test_get_stt_valid_file(self): @@ -324,8 +362,11 @@ def test_get_stt_valid_file(self): stt_resp = self.bus.wait_for_response(Message( "neon.get_stt", {"audio_file": os.path.join(AUDIO_FILE_PATH, "stop.wav")}, - context), context["ident"], 60.0) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"], 60.0) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("parser_data"), dict, stt_resp.serialize()) self.assertIsInstance(stt_resp.data.get("transcripts"), list, @@ -340,8 +381,11 @@ def test_get_stt_valid_contents(self): "stop.wav")) stt_resp = self.bus.wait_for_response(Message( "neon.get_stt", {"audio_data": audio_data}, - context), context["ident"], 60.0) - self.assertEqual(stt_resp.context, context) + dict(context)), context["ident"], 60.0) + for key in context: + self.assertEqual(stt_resp.context[key], context[key]) + self.assertIsInstance(stt_resp.context['timing']['response_sent'], + float) self.assertIsInstance(stt_resp.data.get("parser_data"), dict, stt_resp.serialize()) self.assertIsInstance(stt_resp.data.get("transcripts"), list, @@ -359,11 +403,12 @@ def test_audio_input_valid(self): "stop.wav")) stt_resp = self.bus.wait_for_response(Message( "neon.audio_input", {"audio_data": audio_data}, - context), context["ident"], 60.0) + dict(context)), context["ident"], 60.0) self.assertIsInstance(stt_resp, Message) for key in context: self.assertIn(key, stt_resp.context) - self.assertEqual(context[key], stt_resp.context[key]) + if key != "timing": + self.assertEqual(context[key], stt_resp.context[key]) self.assertIsInstance(stt_resp.data.get("skills_recv"), bool, stt_resp.serialize()) @@ -372,7 +417,8 @@ def test_audio_input_valid(self): self.assertIsInstance(message, Message) for key in context: self.assertIn(key, message.context) - self.assertEqual(context[key], message.context[key]) + if key != "timing": + self.assertEqual(context[key], message.context[key]) self.assertIsInstance(message.data["utterances"], list, message.data) self.assertIn("stop", message.data["utterances"], message.data.get("utterances")) From b1757265da3d0b46d0e8e76e232367cf0d2d5dbc Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Tue, 14 Nov 2023 01:19:01 +0000 Subject: [PATCH 06/16] Increment Version to 4.2.1a3 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index c533f4d..5ac0ed7 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a2" +__version__ = "4.2.1a3" From 5eef8aa2fe9894bec81357c00b511cdf0588c14f Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:13:00 -0800 Subject: [PATCH 07/16] Add timing metrics (#183) # Description Override AudioTransformersService to add timing context and metrics reporting Override listener handlers to add timing context and metrics reporting Adds save_audio timing context and reporting Adds save_ww reporting # Issues # Other Notes --------- Co-authored-by: Daniel McKnight --- neon_speech/__init__.py | 3 +++ neon_speech/service.py | 33 ++++++++++++++++++++++++ neon_speech/transformers.py | 47 +++++++++++++++++++++++++++++++++++ requirements/requirements.txt | 2 +- 4 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 neon_speech/transformers.py diff --git a/neon_speech/__init__.py b/neon_speech/__init__.py index 718d1b0..4af0cae 100644 --- a/neon_speech/__init__.py +++ b/neon_speech/__init__.py @@ -25,3 +25,6 @@ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Import to ensure patched class is applied +from neon_speech.transformers import NeonAudioTransformerService diff --git a/neon_speech/service.py b/neon_speech/service.py index e6feb36..1e9f014 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -112,6 +112,8 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error, watchdog=watchdog) self.daemon = daemonic self.config.bus = self.bus + self._stt_stopwatch = Stopwatch("get_stt", allow_reporting=True, + bus=self.bus) from neon_utils.signal_utils import init_signal_handlers, \ init_signal_bus init_signal_bus(self.bus) @@ -133,6 +135,37 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error, LOG.info("Skipping api_stt init") self.api_stt = None + def _record_begin(self): + self._stt_stopwatch.start() + OVOSDinkumVoiceService._record_begin(self) + + def _stt_text(self, text: str, stt_context: dict): + self._stt_stopwatch.stop() + stt_context.setdefault("timing", dict()) + stt_context["timing"]["get_stt"] = self._stt_stopwatch.time + + # This is where the first Message of the interaction is created + OVOSDinkumVoiceService._stt_text(self, text, stt_context) + self._stt_stopwatch.report() + + def _save_stt(self, audio_bytes, stt_meta, save_path=None): + stopwatch = Stopwatch("save_audio", True, self.bus) + with stopwatch: + path = OVOSDinkumVoiceService._save_stt(self, audio_bytes, stt_meta, + save_path) + stt_meta.setdefault('timing', dict()) + stt_meta['timing']['save_audio'] = stopwatch.time + return path + + def _save_ww(self, audio_bytes, ww_meta, save_path=None): + stopwatch = Stopwatch("save_ww", True, self.bus) + with stopwatch: + path = OVOSDinkumVoiceService._save_ww(self, audio_bytes, ww_meta, + save_path) + ww_meta.setdefault('timing', dict()) + ww_meta['timing']['save_ww'] = stopwatch.time + return path + def _validate_message_context(self, message: Message, native_sources=None): if message.context.get('destination') and \ "audio" not in message.context['destination']: diff --git a/neon_speech/transformers.py b/neon_speech/transformers.py new file mode 100644 index 0000000..dde3179 --- /dev/null +++ b/neon_speech/transformers.py @@ -0,0 +1,47 @@ +# NEON AI (TM) SOFTWARE, Software Development Kit & Application Framework +# All trademark and other rights reserved by their respective owners +# Copyright 2008-2022 Neongecko.com Inc. +# Contributors: Daniel McKnight, Guy Daniels, Elon Gasper, Richard Leeds, +# Regina Bloomstine, Casimiro Ferreira, Andrii Pernatii, Kirill Hrymailo +# BSD-3 License +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import ovos_dinkum_listener.transformers +from neon_utils.metrics_utils import Stopwatch +from ovos_dinkum_listener.transformers import AudioTransformersService + + +class NeonAudioTransformerService(AudioTransformersService): + """ + Overrides the default AudioTransformersService to add timing metrics + """ + + def transform(self, chunk: bytes) -> (bytes, dict): + stopwatch = Stopwatch("transform_audio", True, self.bus) + with stopwatch: + chunk, context = AudioTransformersService.transform(self, chunk) + context.setdefault("timing", dict()) + context['timing']['transform_audio'] = stopwatch.time + return chunk, context + + +ovos_dinkum_listener.transformers.AudioTransformersService = NeonAudioTransformerService diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c9f6b97..1a2a78e 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -4,7 +4,7 @@ ovos-utils~=0.0.30 ovos-plugin-manager~=0.0.23 click~=8.0 click-default-group~=1.2 -neon-utils[network,audio]~=1.6 +neon-utils[network,audio]~=1.6,>=1.7.1a4 ovos-config~=0.0.7 ovos-vad-plugin-webrtcvad~=0.0.1 From 11c9f11a4760c97e92faf80cdd2a76a428c6aa9b Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Wed, 22 Nov 2023 00:13:18 +0000 Subject: [PATCH 08/16] Increment Version to 4.2.1a4 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index 5ac0ed7..0acd7a8 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a3" +__version__ = "4.2.1a4" From 4c2f2e71d09e44c2beaf09714b48ec292d6de468 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:26:38 -0800 Subject: [PATCH 09/16] Update global config on local user STT language change (#184) # Description Handle local user language changes as global changes for STT handling # Issues # Other Notes Co-authored-by: Daniel McKnight --- neon_speech/service.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index 1e9f014..9668532 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -328,10 +328,18 @@ def handle_profile_update(self, message): :param message: Message associated with profile update """ updated_profile = message.data.get("profile") - if updated_profile["user"]["username"] == \ + if updated_profile["user"]["username"] != \ self._default_user["user"]["username"]: - apply_local_user_profile_updates(updated_profile, - self._default_user) + LOG.info(f"Ignoring profile update for " + f"{updated_profile['user']['username']}") + return + apply_local_user_profile_updates(updated_profile, + self._default_user) + if updated_profile.get("speech", {}).get("stt_language"): + new_stt_lang = updated_profile["speech"]["stt_language"] + if new_stt_lang != self.config['lang']: + from neon_speech.utils import patch_config + patch_config({"lang": new_stt_lang}) def handle_wake_words_state(self, message): """ From 1154859a07b8bfc9a699fad8da0abbef8cea699e Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Wed, 22 Nov 2023 00:26:57 +0000 Subject: [PATCH 10/16] Increment Version to 4.2.1a5 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index 0acd7a8..7149aed 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a4" +__version__ = "4.2.1a5" From 2e23c04b75c02de4b188cf8d6981902cec820354 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:26:53 -0800 Subject: [PATCH 11/16] Override ovos.language.stt handler for server/API usage (#185) # Description Adds support for k8s deployments and other instances not running a voice loop to query supported STT languages # Issues # Other Notes Co-authored-by: Daniel McKnight --- neon_speech/service.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/neon_speech/service.py b/neon_speech/service.py index 9668532..92199a1 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -221,6 +221,16 @@ def register_event_handlers(self): self.bus.on("neon.enable_wake_word", self.handle_enable_wake_word) self.bus.on("neon.disable_wake_word", self.handle_disable_wake_word) + def _handle_get_languages_stt(self, message): + if self.config.get('listener', {}).get('enable_voice_loop', True): + return OVOSDinkumVoiceService._handle_get_languages_stt(self, + message) + # For server use, get the API STT langs + stt_langs = self.api_stt.available_languages or \ + [self.config.get('lang') or 'en-us'] + LOG.debug(f"Got stt_langs: {stt_langs}") + self.bus.emit(message.response({'langs': list(stt_langs)})) + def handle_disable_wake_word(self, message: Message): """ Disable a wake word. If the requested wake word is the only one enabled, From 77305d7830ab3337b01bc6e8683135967bf6f5df Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Wed, 29 Nov 2023 00:27:08 +0000 Subject: [PATCH 12/16] Increment Version to 4.2.1a6 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index 7149aed..1cc93ff 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a5" +__version__ = "4.2.1a6" From bd09c429340e5f1edc82f1a377215c276bf8ae54 Mon Sep 17 00:00:00 2001 From: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Date: Wed, 13 Dec 2023 13:26:57 -0800 Subject: [PATCH 13/16] Update neon-utils dependency to stable release (#186) # Description # Issues # Other Notes Co-authored-by: Daniel McKnight --- requirements/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 1a2a78e..b10137e 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -4,7 +4,7 @@ ovos-utils~=0.0.30 ovos-plugin-manager~=0.0.23 click~=8.0 click-default-group~=1.2 -neon-utils[network,audio]~=1.6,>=1.7.1a4 +neon-utils[network,audio]~=1.7 ovos-config~=0.0.7 ovos-vad-plugin-webrtcvad~=0.0.1 From 22ddf3a0ddb8c68d4da55f18d06216d8f8cc9804 Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Wed, 13 Dec 2023 21:27:13 +0000 Subject: [PATCH 14/16] Increment Version to 4.2.1a7 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index 1cc93ff..c88fbd3 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a6" +__version__ = "4.2.1a7" From 8e4cb3c2931c569e717e81c96db44724ba660eaa Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Mon, 18 Dec 2023 22:44:52 +0000 Subject: [PATCH 15/16] Increment Version to 4.3.0 --- version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.py b/version.py index c88fbd3..1102bcb 100644 --- a/version.py +++ b/version.py @@ -26,4 +26,4 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "4.2.1a7" +__version__ = "4.3.0" From b9c3c64b9eff50bf76f14389b9fc295685ef65f6 Mon Sep 17 00:00:00 2001 From: NeonDaniel Date: Mon, 18 Dec 2023 22:45:25 +0000 Subject: [PATCH 16/16] Update Changelog --- CHANGELOG.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4e306b..73b4876 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,60 +1,60 @@ # Changelog -## [4.2.0](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.0) (2023-10-27) +## [4.2.1a7](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a7) (2023-12-13) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a6...4.2.0) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a6...4.2.1a7) -**Fixed bugs:** +**Merged pull requests:** -- \[BUG\] Docker `start_listening` resource missing [\#170](https://github.com/NeonGeckoCom/neon_speech/issues/170) +- Update neon-utils dependency to stable release [\#186](https://github.com/NeonGeckoCom/neon_speech/pull/186) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a6](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a6) (2023-10-26) +## [4.2.1a6](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a6) (2023-11-29) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a5...4.1.1a6) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a5...4.2.1a6) **Merged pull requests:** -- OVOS Dinkum Listener Backwards Compat [\#178](https://github.com/NeonGeckoCom/neon_speech/pull/178) ([NeonDaniel](https://github.com/NeonDaniel)) +- Override ovos.language.stt handler for server/API usage [\#185](https://github.com/NeonGeckoCom/neon_speech/pull/185) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a5](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a5) (2023-10-26) +## [4.2.1a5](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a5) (2023-11-22) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a4...4.1.1a5) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a4...4.2.1a5) **Merged pull requests:** -- Stable dependencies for release [\#177](https://github.com/NeonGeckoCom/neon_speech/pull/177) ([NeonDaniel](https://github.com/NeonDaniel)) +- Update global config on local user STT language change [\#184](https://github.com/NeonGeckoCom/neon_speech/pull/184) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a4](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a4) (2023-10-13) +## [4.2.1a4](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a4) (2023-11-22) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a3...4.1.1a4) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a3...4.2.1a4) **Merged pull requests:** -- Update Dinkum Listener dependency [\#176](https://github.com/NeonGeckoCom/neon_speech/pull/176) ([NeonDaniel](https://github.com/NeonDaniel)) +- Add timing metrics [\#183](https://github.com/NeonGeckoCom/neon_speech/pull/183) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a3](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a3) (2023-10-03) +## [4.2.1a3](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a3) (2023-11-14) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a2...4.1.1a3) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a2...4.2.1a3) **Merged pull requests:** -- Add timing metrics for minerva testing [\#175](https://github.com/NeonGeckoCom/neon_speech/pull/175) ([NeonDaniel](https://github.com/NeonDaniel)) +- Improved timing context handling with unit tests [\#182](https://github.com/NeonGeckoCom/neon_speech/pull/182) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a2](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a2) (2023-07-28) +## [4.2.1a2](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a2) (2023-11-10) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.1a1...4.1.1a2) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.1a1...4.2.1a2) **Merged pull requests:** -- Kubernetes/No-audio server compat. [\#174](https://github.com/NeonGeckoCom/neon_speech/pull/174) ([NeonDaniel](https://github.com/NeonDaniel)) +- Add timing metrics for audio input to handler in speech service [\#181](https://github.com/NeonGeckoCom/neon_speech/pull/181) ([NeonDaniel](https://github.com/NeonDaniel)) -## [4.1.1a1](https://github.com/NeonGeckoCom/neon_speech/tree/4.1.1a1) (2023-07-27) +## [4.2.1a1](https://github.com/NeonGeckoCom/neon_speech/tree/4.2.1a1) (2023-11-09) -[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.1.0...4.1.1a1) +[Full Changelog](https://github.com/NeonGeckoCom/neon_speech/compare/4.2.0...4.2.1a1) **Merged pull requests:** -- Update container config handling and resolve logged warnings [\#173](https://github.com/NeonGeckoCom/neon_speech/pull/173) ([NeonDaniel](https://github.com/NeonDaniel)) +- Resample API input wav audio to ensure format matches listener config [\#180](https://github.com/NeonGeckoCom/neon_speech/pull/180) ([NeonDaniel](https://github.com/NeonDaniel))