Skip to content

Commit

Permalink
Merge pull request #133 from SaarthShah/main
Browse files Browse the repository at this point in the history
added test cases for features & testing for JSON structure
  • Loading branch information
SaarthShah authored Aug 14, 2023
2 parents 9574006 + a69d96c commit b69aa0e
Showing 1 changed file with 168 additions and 0 deletions.
168 changes: 168 additions & 0 deletions tests/test_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,178 @@ async def test_transcribe_prerecorded_file():
response = await deepgram.transcription.prerecorded({"buffer": audio, "mimetype": "audio/wav"})
assert "results" in response


def test_prerecorded_json_structure():
"""
Testing the JSON structure of the Deepgram response. This should be consistent across all outputs
unless there is a breaking change in the API itself.
"""
response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"smart_format": True,
},
)

# Checks that both results and metadata are present in the base response
assert set(response.keys()) == set(['results','metadata'])

# Checks that metadata contains the expected keys
assert set(response['metadata'].keys()) == set(['channels','created','duration','model_info','models','request_id','sha256','transaction_key'])

# Checks that the results key contains the expected keys
assert list(response['results'].keys()) == ['channels']

# Checks that the channels key contains the expected keys
assert len(response['results']['channels']) == 1

# Checks if alternatives is present in the channels[0] object
assert list(response['results']['channels'][0].keys()) == ['alternatives']

# Checks if alternatives is a list of length 1
assert len(response['results']['channels'][0]['alternatives']) == 1

# Checks if the alternatives[0] object contains the expected keys
assert set(response['results']['channels'][0]['alternatives'][0].keys()) == set(['transcript', 'confidence', 'words', 'paragraphs'])

# Checks if the transcript is a string
assert type(response['results']['channels'][0]['alternatives'][0]['transcript']) == str

# Checks if the confidence is a float
assert type(response['results']['channels'][0]['alternatives'][0]['confidence']) == float

# Checks if the words is a list
assert type(response['results']['channels'][0]['alternatives'][0]['words']) == list

# Checks if the paragraphs is a dict
assert type(response['results']['channels'][0]['alternatives'][0]['paragraphs']) == dict

# Checks if the transcript within the paragraphs object is a string
assert type(response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']) == str

# Checks if the paragraphs within the paragraphs object is a list
assert type(response['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs']) == list

# Checks if the paragraphs[0] object contains the expected keys
assert set(response['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs'][0]) == set(['sentences', 'num_words', 'start', 'end'])


def test_diarization():

"""
Testing the diarization output of the Deepgram response.
"""

response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"smart_format": True,
"diarize": True
},
)

# Checks the keys for the alternatives[0] object
assert set(response['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs'][0].keys()) == set(['sentences','speaker', 'num_words', 'start', 'end'])

# Checks the keys in the words object
assert set(response['results']['channels'][0]['alternatives'][0]['words'][0].keys()) == set(['word', 'start', 'end', 'confidence', 'speaker', 'speaker_confidence', 'punctuated_word'])


def test_summarize():
"""
Checking the summarize output of the Deepgram response, especially for summarize v2.
"""
response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"smart_format": True,
"summarize": 'v2'
},
)

# Checks if the summary object has the expected keys
assert set(response['results']['summary'].keys()) == set(['result', 'short'])
# Check if the result is a string
assert type(response['results']['summary']['result']) == str

# Check if the request was successful
assert response['results']['summary']['result'] == 'success'

# Check if the short is a string
assert type(response['results']['summary']['short']) == str


def test_topic_detection():
"""
Checking the topic detection output of the Deepgram response.
"""
response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"smart_format": True,
"detect_topics": True
},
)

# Checks if the topics key is present in the alternatives[0] object
assert type(response['results']['channels'][0]['alternatives'][0]['topics']) == list

# Checks if the topics[0] object is a dict
assert type(response['results']['channels'][0]['alternatives'][0]['topics'][0]) == dict

# Checks if the topics[0] object contains the expected keys
assert set(response['results']['channels'][0]['alternatives'][0]['topics'][0].keys()) == set(['text', 'start_word', 'end_word', 'topics'])

# Checks if the topics object is a list
assert type(response['results']['channels'][0]['alternatives'][0]['topics'][0]['topics']) == list


def test_detect_language():
response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"detect_language": True
},
)
assert type(response['results']['channels'][0]['detected_language']) == str
assert response['results']['channels'][0]['detected_language'] == 'en'


def test_alternatives():
response = deepgram.transcription.sync_prerecorded(
{
"url": AUDIO_URL
},
{
"model": "nova",
"alternatives": 2
},
)
assert len(response['results']['channels'][0]['alternatives']) == 2


def test_missing_api_key():
with pytest.raises(DeepgramSetupError):
Deepgram({})


def test_400_error():
with pytest.raises(DeepgramApiError):
deepgram.transcription.sync_prerecorded({"url": AUDIO_URL}, {"model": "nova", "language": "ta"})

0 comments on commit b69aa0e

Please sign in to comment.