From 27d8a12006b120bf6025360d5c789399eaf8839f Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Mon, 2 Oct 2023 18:50:36 -0700 Subject: [PATCH 1/5] Add `get_stt` timing metric for audio input --- neon_speech/service.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index 79f13dc..621ecee 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -37,6 +37,7 @@ from neon_utils.file_utils import decode_base64_string_to_file from ovos_utils.log import LOG from neon_utils.configuration_utils import get_neon_user_config +from neon_utils.metrics_utils import Stopwatch from neon_utils.user_utils import apply_local_user_profile_updates from ovos_bus_client import Message from ovos_config.config import update_mycroft_config @@ -79,6 +80,8 @@ def on_started(): class NeonSpeechClient(OVOSDinkumVoiceService): + _stopwatch = Stopwatch("get_stt") + def __init__(self, ready_hook=on_ready, error_hook=on_error, stopping_hook=on_stopping, alive_hook=on_alive, started_hook=on_started, watchdog=lambda: None, @@ -372,8 +375,11 @@ def build_context(msg: Message): wav_file_path = message.data.get("audio_file") lang = message.data.get("lang") try: - _, parser_data, transcriptions = \ - self._get_stt_from_file(wav_file_path, lang) + with self._stopwatch: + _, parser_data, transcriptions = \ + self._get_stt_from_file(wav_file_path, lang) + message.context.setdefault('timing', dict()) + message.context['timing']['get_stt'] = self._stopwatch.time() message.context["audio_parser_data"] = parser_data context = build_context(message) data = { From 2965df6e969a1f4bf17af18d824f0ef76313044e Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Mon, 2 Oct 2023 19:05:47 -0700 Subject: [PATCH 2/5] Fix typo in added timing data handling --- neon_speech/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index 621ecee..3779af5 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -379,7 +379,7 @@ def build_context(msg: Message): _, parser_data, transcriptions = \ self._get_stt_from_file(wav_file_path, lang) message.context.setdefault('timing', dict()) - message.context['timing']['get_stt'] = self._stopwatch.time() + message.context['timing']['get_stt'] = self._stopwatch.time message.context["audio_parser_data"] = parser_data context = build_context(message) data = { From 1476b19e0539e6686dd0180ef109b38cf12fb4b0 Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Mon, 2 Oct 2023 19:33:56 -0700 Subject: [PATCH 3/5] Fix `get_stt` timing context handling --- neon_speech/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/neon_speech/service.py b/neon_speech/service.py index 3779af5..44ce694 100644 --- a/neon_speech/service.py +++ b/neon_speech/service.py @@ -378,10 +378,9 @@ def build_context(msg: Message): with self._stopwatch: _, parser_data, transcriptions = \ self._get_stt_from_file(wav_file_path, lang) - message.context.setdefault('timing', dict()) - message.context['timing']['get_stt'] = self._stopwatch.time message.context["audio_parser_data"] = parser_data context = build_context(message) + context['timing']['get_stt'] = self._stopwatch.time data = { "utterances": transcriptions, "lang": message.data.get("lang", "en-us") From f4ab41d2253b37d688139ba07c54ef91266d6885 Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Tue, 3 Oct 2023 13:25:06 -0700 Subject: [PATCH 4/5] Whitelist audioread license (MIT) https://github.com/beetbox/audioread/blob/main/LICENSE --- .github/workflows/license_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/license_tests.yml b/.github/workflows/license_tests.yml index 7d0c4f6..c57fbca 100644 --- a/.github/workflows/license_tests.yml +++ b/.github/workflows/license_tests.yml @@ -8,3 +8,5 @@ on: jobs: license_tests: uses: neongeckocom/.github/.github/workflows/license_tests.yml@master + with: + packages-exclude: '^(precise-runner|fann2|tqdm|bs4|ovos-phal-plugin|ovos-skill|neon-core|nvidia|neon-phal-plugin|bitstruct|audioread).*' From 448908b677011da7448dcfe5b98f118e7d007c3b Mon Sep 17 00:00:00 2001 From: Daniel McKnight Date: Tue, 3 Oct 2023 13:28:07 -0700 Subject: [PATCH 5/5] Patch dependency for https://github.com/microsoft/onnxruntime/issues/17631 --- requirements/docker.txt | 2 ++ requirements/test_requirements.txt | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements/docker.txt b/requirements/docker.txt index 37f32e8..bb0aae8 100644 --- a/requirements/docker.txt +++ b/requirements/docker.txt @@ -1,5 +1,7 @@ ovos-stt-plugin-vosk~=0.1 neon-stt-plugin-nemo~=0.0.2 +onnxruntime!=1.16.0 # TODO: Patching https://github.com/microsoft/onnxruntime/issues/17631 + # Load alternative WW plugins so they are available ovos-ww-plugin-pocketsphinx~=0.1 ovos-ww-plugin-precise-lite~=0.1 diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index ea60ae7..94bb8f2 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -5,4 +5,5 @@ ovos-stt-plugin-server~=0.0.3 pytest mock~=4.0 pydub~=0.23 -SpeechRecognition~=3.8 \ No newline at end of file +SpeechRecognition~=3.8 +onnxruntime!=1.16.0 # TODO: Patching https://github.com/microsoft/onnxruntime/issues/17631