Skip to content

Commit

Permalink
feat(stt): remove interim_results and low_latency wss params
Browse files Browse the repository at this point in the history
  • Loading branch information
apaparazzi0329 committed May 15, 2024
1 parent 039324a commit 1a6db36
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 110 deletions.
3 changes: 1 addition & 2 deletions examples/microphone-speech-to-text.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ def recognize_using_weboscket(*args):
mycallback = MyRecognizeCallback()
speech_to_text.recognize_using_websocket(audio=audio_source,
content_type='audio/l16; rate=44100',
recognize_callback=mycallback,
interim_results=True)
recognize_callback=mycallback)

###############################################
#### Prepare the for recording using Pyaudio ##
Expand Down
22 changes: 1 addition & 21 deletions ibm_watson/speech_to_text_v1_adapter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

# (C) Copyright IBM Corp. 2018, 2021.
# (C) Copyright IBM Corp. 2018, 2024.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,6 @@ def recognize_using_websocket(self,
customization_weight=None,
base_model_version=None,
inactivity_timeout=None,
interim_results=None,
keywords=None,
keywords_threshold=None,
max_alternatives=None,
Expand All @@ -55,7 +54,6 @@ def recognize_using_websocket(self,
split_transcript_at_phrase_end=None,
speech_detector_sensitivity=None,
background_audio_suppression=None,
low_latency=None,
character_insertion_bias=None,
**kwargs):
"""
Expand Down Expand Up @@ -271,22 +269,6 @@ def recognize_using_websocket(self,
* 1.0 suppresses all audio (no audio is transcribed).
The values increase on a monotonic curve. See [Background audio
suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
:param bool low_latency: (optional) If `true` for next-generation
`Multimedia` and `Telephony` models that support low latency, directs the
service to produce results even more quickly than it usually does.
Next-generation models produce transcription results faster than
previous-generation models. The `low_latency` parameter causes the models
to produce results even more quickly, though the results might be less
accurate when the parameter is used.
**Note:** The parameter is beta functionality. It is not available for
previous-generation `Broadband` and `Narrowband` models. It is available
only for some next-generation models.
* For a list of next-generation models that support low latency, see
[Supported language
models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
for next-generation models.
* For more information about the `low_latency` parameter, see [Low
latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
:param float character_insertion_bias: (optional) For next-generation
`Multimedia` and `Telephony` models, an indication of whether the service
is biased to recognize shorter or longer strings of characters when
Expand Down Expand Up @@ -355,7 +337,6 @@ def recognize_using_websocket(self,
'customization_weight': customization_weight,
'content_type': content_type,
'inactivity_timeout': inactivity_timeout,
'interim_results': interim_results,
'keywords': keywords,
'keywords_threshold': keywords_threshold,
'max_alternatives': max_alternatives,
Expand All @@ -375,7 +356,6 @@ def recognize_using_websocket(self,
'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
'speech_detector_sensitivity': speech_detector_sensitivity,
'background_audio_suppression': background_audio_suppression,
'low_latency': low_latency,
'character_insertion_bias': character_insertion_bias
}
options = {k: v for k, v in options.items() if v is not None}
Expand Down
19 changes: 9 additions & 10 deletions ibm_watson/websocket/recognize_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,15 @@ def on_data(self, ws, message, message_type, fin):
# set of transcriptions and send them to the appropriate callbacks.
results = json_object.get('results')
if results:
if (self.options.get('interim_results') is True):
b_final = (results[0].get('final') is True)
alternatives = results[0].get('alternatives')
if alternatives:
hypothesis = alternatives[0].get('transcript')
transcripts = self.extract_transcripts(alternatives)
if b_final:
self.callback.on_transcription(transcripts)
if hypothesis:
self.callback.on_hypothesis(hypothesis)
b_final = (results[0].get('final') is True)
alternatives = results[0].get('alternatives')
if alternatives:
hypothesis = alternatives[0].get('transcript')
transcripts = self.extract_transcripts(alternatives)
if b_final:
self.callback.on_transcription(transcripts)
if hypothesis:
self.callback.on_hypothesis(hypothesis)
else:
final_transcript = []
for result in results:
Expand Down
77 changes: 0 additions & 77 deletions test/integration/test_speech_to_text_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,83 +118,6 @@ def on_data(self, data):
assert test_callback.data['results'][0]['alternatives'][0]
['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '

def test_on_transcription_interim_results_false(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=False, low_latency=False)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0][0]['transcript'] in ['isolated tornadoes ', 'isolated tornados ']
assert test_callback.transcript[1][0]['transcript'] == 'and heavy rain '

def test_on_transcription_interim_results_true(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript
assert transcript[0]['confidence'] is not None
assert transcript[0]['transcript'] is not None

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=True, low_latency=True)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '

def test_on_transcription_interim_results_true_low_latency_false(self):

class MyRecognizeCallback(RecognizeCallback):

def __init__(self):
RecognizeCallback.__init__(self)
self.error = None
self.transcript = None

def on_error(self, error):
self.error = error

def on_transcription(self, transcript):
self.transcript = transcript
assert transcript[0]['confidence'] is not None
assert transcript[0]['transcript'] is not None

test_callback = MyRecognizeCallback()
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
audio_source = AudioSource(audio_file, False)
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
interim_results=True, low_latency=False)
assert test_callback.error is None
assert test_callback.transcript is not None
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '

def test_custom_grammars(self):
customization_id = None
for custom_model in self.custom_models.get('customizations'):
Expand Down

0 comments on commit 1a6db36

Please sign in to comment.