From 2c54f9cf4780aa81a456b54adfa8c1bf187ef8c0 Mon Sep 17 00:00:00 2001 From: Jeesang Kim Date: Fri, 4 Aug 2023 17:18:49 +0900 Subject: [PATCH] python: move up the inner methods for readability --- samples/python/console/intent_sample.py | 38 +++--- .../speech_language_detection_sample.py | 44 +++---- samples/python/console/speech_sample.py | 119 +++++++++--------- .../python/console/transcription_sample.py | 12 +- samples/python/console/translation_sample.py | 100 +++++++-------- 5 files changed, 157 insertions(+), 156 deletions(-) diff --git a/samples/python/console/intent_sample.py b/samples/python/console/intent_sample.py index f820f6298..42ce56194 100644 --- a/samples/python/console/intent_sample.py +++ b/samples/python/console/intent_sample.py @@ -124,19 +124,6 @@ def recognize_intent_once_from_file(): def recognize_intent_once_async_from_mic(): - """performs one-shot asynchronous intent recognition from input from the default microphone""" - # Set up the config for the intent recognizer - intent_config = speechsdk.SpeechConfig(subscription=intent_key, region=intent_service_region) - audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) - - # Set up the intent recognizer - intent_recognizer = speechsdk.intent.IntentRecognizer(speech_config=intent_config, audio_config=audio_config) - - # Add callbacks to the recognition events - - # Set up a flag to mark when asynchronous recognition is done - done = False - def recognized_callback(evt: speechsdk.intent.IntentRecognitionEventArgs): """ Callback that is called on successful recognition of a full utterance by both speech @@ -161,6 +148,19 @@ def recognizing_callback(evt: speechsdk.intent.IntentRecognitionEventArgs): result = evt.result print("Intermediate transcription: \"{}\"".format(result.text)) + """performs one-shot asynchronous intent recognition from input from the default microphone""" + # Set up the config for the intent recognizer + intent_config = speechsdk.SpeechConfig(subscription=intent_key, region=intent_service_region) + audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) + + # Set up the intent recognizer + intent_recognizer = speechsdk.intent.IntentRecognizer(speech_config=intent_config, audio_config=audio_config) + + # Add callbacks to the recognition events + + # Set up a flag to mark when asynchronous recognition is done + done = False + # Connect the callbacks intent_recognizer.recognized.connect(recognized_callback) intent_recognizer.canceled.connect(canceled_callback) @@ -192,6 +192,12 @@ def recognizing_callback(evt: speechsdk.intent.IntentRecognitionEventArgs): def recognize_intent_continuous(): + def stop_cb(evt: speechsdk.SessionEventArgs): + """callback that signals to stop continuous recognition upon receiving an event `evt`""" + print('CLOSING on {}'.format(evt)) + nonlocal done + done = True + """performs continuous intent recognition from input from an audio file""" # intent_config = speechsdk.SpeechConfig(subscription=intent_key, region=intent_service_region) @@ -215,12 +221,6 @@ def recognize_intent_continuous(): # Connect callback functions to the signals the intent recognizer fires. done = False - def stop_cb(evt: speechsdk.SessionEventArgs): - """callback that signals to stop continuous recognition upon receiving an event `evt`""" - print('CLOSING on {}'.format(evt)) - nonlocal done - done = True - intent_recognizer.session_started.connect(lambda evt: print("SESSION_START: {}".format(evt))) intent_recognizer.speech_end_detected.connect(lambda evt: print("SPEECH_END_DETECTED: {}".format(evt))) # event for intermediate results diff --git a/samples/python/console/speech_language_detection_sample.py b/samples/python/console/speech_language_detection_sample.py index e645073b2..fd39b87ae 100644 --- a/samples/python/console/speech_language_detection_sample.py +++ b/samples/python/console/speech_language_detection_sample.py @@ -116,28 +116,6 @@ def speech_language_detection_once_from_file(): def speech_language_detection_once_from_continuous(): - """performs continuous speech language detection with input from an audio file""" - # - # Creates an AutoDetectSourceLanguageConfig, which defines a number of possible spoken languages - auto_detect_source_language_config = \ - speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=["zh-CN", "en-US"]) - - # Creates a SpeechConfig from your speech key and region - speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) - - # Set continuous language detection (override the default of "AtStart") - speech_config.set_property( - property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode, value='Continuous') - - audio_config = speechsdk.audio.AudioConfig(filename=multilingual_wav_file) - - source_language_recognizer = speechsdk.SourceLanguageRecognizer( - speech_config=speech_config, - auto_detect_source_language_config=auto_detect_source_language_config, - audio_config=audio_config) - - done = False - def stop_cb(evt: speechsdk.SessionEventArgs): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) @@ -171,6 +149,28 @@ def audio_recognized(evt: speechsdk.SpeechRecognitionEventArgs): global language_detected language_detected = True + """performs continuous speech language detection with input from an audio file""" + # + # Creates an AutoDetectSourceLanguageConfig, which defines a number of possible spoken languages + auto_detect_source_language_config = \ + speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=["zh-CN", "en-US"]) + + # Creates a SpeechConfig from your speech key and region + speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) + + # Set continuous language detection (override the default of "AtStart") + speech_config.set_property( + property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode, value='Continuous') + + audio_config = speechsdk.audio.AudioConfig(filename=multilingual_wav_file) + + source_language_recognizer = speechsdk.SourceLanguageRecognizer( + speech_config=speech_config, + auto_detect_source_language_config=auto_detect_source_language_config, + audio_config=audio_config) + + done = False + # Connect callbacks to the events fired by the speech recognizer source_language_recognizer.recognized.connect(audio_recognized) source_language_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) diff --git a/samples/python/console/speech_sample.py b/samples/python/console/speech_sample.py index a47f2f335..d256231b6 100644 --- a/samples/python/console/speech_sample.py +++ b/samples/python/console/speech_sample.py @@ -407,20 +407,6 @@ def stop_cb(evt: speechsdk.SessionEventArgs): # def speech_recognize_keyword_from_microphone(): - """performs keyword-triggered speech recognition with input microphone""" - speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) - - # Creates an instance of a keyword recognition model. Update this to - # point to the location of your keyword recognition model. - model = speechsdk.KeywordRecognitionModel("YourKeywordRecognitionModelFile.table") - - # The phrase your keyword recognition model triggers on. - keyword = "YourKeyword" - - speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) - - done = False - def stop_cb(evt: speechsdk.SessionEventArgs): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) @@ -443,6 +429,20 @@ def recognized_cb(evt: speechsdk.SpeechRecognitionEventArgs): elif evt.result.reason == speechsdk.ResultReason.NoMatch: print('NOMATCH: {}'.format(evt)) + """performs keyword-triggered speech recognition with input microphone""" + speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) + + # Creates an instance of a keyword recognition model. Update this to + # point to the location of your keyword recognition model. + model = speechsdk.KeywordRecognitionModel("YourKeywordRecognitionModelFile.table") + + # The phrase your keyword recognition model triggers on. + keyword = "YourKeyword" + + speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) + + done = False + # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(recognizing_cb) speech_recognizer.recognized.connect(recognized_cb) @@ -493,6 +493,12 @@ def close(self): """close callback function""" self._file_h.close() + def stop_cb(evt: speechsdk.SessionEventArgs): + """callback that signals to stop continuous recognition upon receiving an event `evt`""" + print('CLOSING on {}'.format(evt)) + nonlocal done + done = True + speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # specify the audio format @@ -509,12 +515,6 @@ def close(self): done = False - def stop_cb(evt: speechsdk.SessionEventArgs): - """callback that signals to stop continuous recognition upon receiving an event `evt`""" - print('CLOSING on {}'.format(evt)) - nonlocal done - done = True - # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt))) @@ -553,6 +553,11 @@ def push_stream_writer(stream): def speech_recognition_with_push_stream(): + def session_stopped_cb(evt): + """callback that signals to stop continuous recognition upon receiving an event `evt`""" + print('SESSION STOPPED: {}'.format(evt)) + recognition_done.set() + """gives an example how to use a push audio stream to recognize speech from a custom audio source""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) @@ -566,10 +571,6 @@ def speech_recognition_with_push_stream(): recognition_done = threading.Event() # Connect callbacks to the events fired by the speech recognizer - def session_stopped_cb(evt): - """callback that signals to stop continuous recognition upon receiving an event `evt`""" - print('SESSION STOPPED: {}'.format(evt)) - recognition_done.set() speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt))) @@ -653,20 +654,6 @@ def speech_recognize_with_auto_language_detection_UsingCustomizedModel(): def speech_recognize_keyword_locally_from_microphone(): - """runs keyword spotting locally, with direct access to the result audio""" - - # Creates an instance of a keyword recognition model. Update this to - # point to the location of your keyword recognition model. - model = speechsdk.KeywordRecognitionModel("YourKeywordRecognitionModelFile.table") - - # The phrase your keyword recognition model triggers on. - keyword = "YourKeyword" - - # Create a local keyword recognizer with the default microphone device for input. - keyword_recognizer = speechsdk.KeywordRecognizer() - - done = False - def recognized_cb(evt: speechsdk.SpeechRecognitionEventArgs): # Only a keyword phrase is recognized. The result cannot be 'NoMatch' # and there is no timeout. The recognizer runs until a keyword phrase @@ -685,6 +672,20 @@ def canceled_cb(evt: speechsdk.SpeechRecognitionCanceledEventArgs): nonlocal done done = True + """runs keyword spotting locally, with direct access to the result audio""" + + # Creates an instance of a keyword recognition model. Update this to + # point to the location of your keyword recognition model. + model = speechsdk.KeywordRecognitionModel("YourKeywordRecognitionModelFile.table") + + # The phrase your keyword recognition model triggers on. + keyword = "YourKeyword" + + # Create a local keyword recognizer with the default microphone device for input. + keyword_recognizer = speechsdk.KeywordRecognizer() + + done = False + # Connect callbacks to the events fired by the keyword recognizer. keyword_recognizer.recognized.connect(recognized_cb) keyword_recognizer.canceled.connect(canceled_cb) @@ -788,6 +789,27 @@ def pronunciation_assessment_continuous_from_file(): import difflib import json + def stop_cb(evt: speechsdk.SessionEventArgs): + """callback that signals to stop continuous recognition upon receiving an event `evt`""" + print('CLOSING on {}'.format(evt)) + nonlocal done + done = True + + def recognized(evt: speechsdk.SpeechRecognitionEventArgs): + print('pronunciation assessment for: {}'.format(evt.result.text)) + pronunciation_result = speechsdk.PronunciationAssessmentResult(evt.result) + print(' Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}'.format( + pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, + pronunciation_result.completeness_score, pronunciation_result.fluency_score + )) + nonlocal recognized_words, fluency_scores, durations + recognized_words += pronunciation_result.words + fluency_scores.append(pronunciation_result.fluency_score) + json_result = evt.result.properties.get(speechsdk.PropertyId.SpeechServiceResponse_JsonResult) + jo = json.loads(json_result) + nb = jo['NBest'][0] + durations.append(sum([int(w['Duration']) for w in nb['Words']])) + # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). # Note: The sample is for en-US language. @@ -814,27 +836,6 @@ def pronunciation_assessment_continuous_from_file(): fluency_scores = [] durations = [] - def stop_cb(evt: speechsdk.SessionEventArgs): - """callback that signals to stop continuous recognition upon receiving an event `evt`""" - print('CLOSING on {}'.format(evt)) - nonlocal done - done = True - - def recognized(evt: speechsdk.SpeechRecognitionEventArgs): - print('pronunciation assessment for: {}'.format(evt.result.text)) - pronunciation_result = speechsdk.PronunciationAssessmentResult(evt.result) - print(' Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}'.format( - pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, - pronunciation_result.completeness_score, pronunciation_result.fluency_score - )) - nonlocal recognized_words, fluency_scores, durations - recognized_words += pronunciation_result.words - fluency_scores.append(pronunciation_result.fluency_score) - json_result = evt.result.properties.get(speechsdk.PropertyId.SpeechServiceResponse_JsonResult) - jo = json.loads(json_result) - nb = jo['NBest'][0] - durations.append(sum([int(w['Duration']) for w in nb['Words']])) - # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognized.connect(recognized) speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) diff --git a/samples/python/console/transcription_sample.py b/samples/python/console/transcription_sample.py index a354281dc..6a38e328f 100644 --- a/samples/python/console/transcription_sample.py +++ b/samples/python/console/transcription_sample.py @@ -39,6 +39,12 @@ # Differentiation of speakers do not require voice signatures. In case more enhanced speaker identification is required, # please use https://signature.centralus.cts.speech.microsoft.com/UI/index.html REST API to create your own voice signatures def conversation_transcription_differentiate_speakers(): + def stop_cb(evt: speechsdk.SessionEventArgs): + """callback that signals to stop continuous transcription upon receiving an event `evt`""" + print('CLOSING {}'.format(evt)) + nonlocal done + done = True + """differentiates speakers using conversation transcription service""" # Creates speech configuration with subscription information speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) @@ -61,12 +67,6 @@ def conversation_transcription_differentiate_speakers(): done = False - def stop_cb(evt: speechsdk.SessionEventArgs): - """callback that signals to stop continuous transcription upon receiving an event `evt`""" - print('CLOSING {}'.format(evt)) - nonlocal done - done = True - # Subscribe to the events fired by the conversation transcriber transcriber.transcribed.connect(lambda evt: print('TRANSCRIBED: {}'.format(evt))) transcriber.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) diff --git a/samples/python/console/translation_sample.py b/samples/python/console/translation_sample.py index 66a654932..d142b9003 100644 --- a/samples/python/console/translation_sample.py +++ b/samples/python/console/translation_sample.py @@ -113,26 +113,11 @@ def translation_once_from_file(): def translation_continuous(): - """performs continuous speech translation from an audio file""" - # - # set up translation parameters: source language and target languages - translation_config = speechsdk.translation.SpeechTranslationConfig( - subscription=speech_key, region=service_region, - speech_recognition_language='en-US', - target_languages=('de', 'fr'), voice_name="de-DE-KatjaNeural") - audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) - - # Creates a translation recognizer using and audio file as input. - recognizer = speechsdk.translation.TranslationRecognizer( - translation_config=translation_config, audio_config=audio_config) - def result_callback(event_type: str, evt: speechsdk.translation.TranslationRecognitionEventArgs): """callback to display a translation result""" print("{}:\n {}\n\tTranslations: {}\n\tResult Json: {}\n".format( event_type, evt, evt.result.translations.items(), evt.result.json)) - done = False - def stop_cb(evt: speechsdk.SessionEventArgs): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) @@ -143,6 +128,28 @@ def canceled_cb(evt: speechsdk.translation.TranslationRecognitionCanceledEventAr print('CANCELED:\n\tReason:{}\n'.format(evt.result.reason)) print('\tDetails: {} ({})'.format(evt, evt.result.cancellation_details.error_details)) + def synthesis_callback(evt: speechsdk.translation.TranslationRecognitionEventArgs): + """ + callback for the synthesis event + """ + print('SYNTHESIZING {}\n\treceived {} bytes of audio. Reason: {}'.format( + evt, len(evt.result.audio), evt.result.reason)) + + """performs continuous speech translation from an audio file""" + # + # set up translation parameters: source language and target languages + translation_config = speechsdk.translation.SpeechTranslationConfig( + subscription=speech_key, region=service_region, + speech_recognition_language='en-US', + target_languages=('de', 'fr'), voice_name="de-DE-KatjaNeural") + audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) + + # Creates a translation recognizer using and audio file as input. + recognizer = speechsdk.translation.TranslationRecognizer( + translation_config=translation_config, audio_config=audio_config) + + done = False + # connect callback functions to the events fired by the recognizer recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) @@ -157,13 +164,6 @@ def canceled_cb(evt: speechsdk.translation.TranslationRecognitionCanceledEventAr recognizer.session_stopped.connect(stop_cb) recognizer.canceled.connect(stop_cb) - def synthesis_callback(evt: speechsdk.translation.TranslationRecognitionEventArgs): - """ - callback for the synthesis event - """ - print('SYNTHESIZING {}\n\treceived {} bytes of audio. Reason: {}'.format( - evt, len(evt.result.audio), evt.result.reason)) - # connect callback to the synthesis event recognizer.synthesizing.connect(synthesis_callback) @@ -229,6 +229,34 @@ def translation_once_with_lid_from_file(): def translation_continuous_with_lid_from_multilingual_file(): + def result_callback(evt): + """callback to display a translation result""" + if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech: + src_lang = evt.result.properties[speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult] + print("""Recognized: + Detected language: {} + Recognition result: {} + German translation: {} + French translation: {}""".format( + src_lang, + evt.result.text, + evt.result.translations['de'], + evt.result.translations['fr'])) + elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: + print("Recognized:\n {}".format(evt.result.text)) + elif evt.result.reason == speechsdk.ResultReason.NoMatch: + print("No speech could be recognized: {}".format(evt.result.no_match_details)) + elif evt.result.reason == speechsdk.ResultReason.Canceled: + print("Translation canceled: {}".format(evt.result.cancellation_details.reason)) + if evt.result.cancellation_details.reason == speechsdk.CancellationReason.Error: + print("Error details: {}".format(evt.result.cancellation_details.error_details)) + + def stop_cb(evt): + """callback that signals to stop continuous recognition upon receiving an event `evt`""" + print('CLOSING on {}'.format(evt)) + nonlocal done + done = True + """performs continuous speech translation from a multi-lingual audio file, with continuous language identification""" # @@ -258,36 +286,8 @@ def translation_continuous_with_lid_from_multilingual_file(): audio_config=audio_config, auto_detect_source_language_config=auto_detect_source_language_config) - def result_callback(evt): - """callback to display a translation result""" - if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech: - src_lang = evt.result.properties[speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult] - print("""Recognized: - Detected language: {} - Recognition result: {} - German translation: {} - French translation: {}""".format( - src_lang, - evt.result.text, - evt.result.translations['de'], - evt.result.translations['fr'])) - elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: - print("Recognized:\n {}".format(evt.result.text)) - elif evt.result.reason == speechsdk.ResultReason.NoMatch: - print("No speech could be recognized: {}".format(evt.result.no_match_details)) - elif evt.result.reason == speechsdk.ResultReason.Canceled: - print("Translation canceled: {}".format(evt.result.cancellation_details.reason)) - if evt.result.cancellation_details.reason == speechsdk.CancellationReason.Error: - print("Error details: {}".format(evt.result.cancellation_details.error_details)) - done = False - def stop_cb(evt): - """callback that signals to stop continuous recognition upon receiving an event `evt`""" - print('CLOSING on {}'.format(evt)) - nonlocal done - done = True - # connect callback functions to the events fired by the recognizer recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))