Skip to content

Commit

Permalink
put azure asr into its own function
Browse files Browse the repository at this point in the history
  • Loading branch information
SanderGi committed Nov 7, 2023
1 parent cdf0dd2 commit 68a314d
Showing 1 changed file with 68 additions and 65 deletions.
133 changes: 68 additions & 65 deletions daras_ai_v2/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,71 +337,7 @@ def run_asr(
is_short = size < SHORT_FILE_CUTOFF

if selected_model == AsrModels.azure:
# transcription from audio url only supported via rest api or cli
# Start by initializing a request
payload = {
"contentUrls": [
audio_url,
],
"displayName": "Gooey Transcription",
"model": None,
"locale": language or "en-US",
"properties": {
"wordLevelTimestampsEnabled": False,
"languageIdentification": {
"candidateLocales": [ # 2-10 locales and one of them must be the actual locale
"es-ES",
"zh-CN",
"da-DK",
language or "en-US",
]
},
},
}
r = requests.post(
f"https://{settings.AZURE_SPEECH_REGION}.api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions",
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
"Content-Type": "application/json",
},
json=payload,
)
r.raise_for_status()
uri = r.json()["self"]

# poll for results
for _ in range(MAX_POLLS):
r = requests.get(
uri,
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
},
)
if not r.ok or not r.json()["status"] == "Succeeded":
sleep(1)
continue
r = requests.get(
uri + "/files",
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
},
)
r.raise_for_status()
transcriptions = []
for value in r.json()["values"]:
if value["kind"] == "Transcription":
r = requests.get(
value["links"]["contentUrl"],
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY
},
)
r.raise_for_status()
transcriptions += [
r.json()["combinedRecognizedPhrases"][0]["display"]
]
return "\n".join(transcriptions)
assert False, "Max polls exceeded, Azure speech did not yield a response"
return azure_asr(audio_url, language)
elif selected_model == AsrModels.deepgram:
r = requests.post(
"https://api.deepgram.com/v1/listen",
Expand Down Expand Up @@ -582,6 +518,73 @@ def _get_or_create_recognizer(
return recognizer


@redis_cache_decorator
def azure_asr(
audio_url: str,
language: str = "en-US",
candidate_languages: list[str] = ["es-ES", "zh-CN", "da-DK"],
):
# transcription from audio url only supported via rest api or cli
# Start by initializing a request
payload = {
"contentUrls": [
audio_url,
],
"displayName": "Gooey Transcription",
"model": None,
"locale": language or "en-US",
"properties": {
"wordLevelTimestampsEnabled": False,
"languageIdentification": {
"candidateLocales": candidate_languages
+ [ # 2-10 locales and one of them must be the actual locale
language or "en-US",
]
},
},
}
r = requests.post(
f"https://{settings.AZURE_SPEECH_REGION}.api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions",
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
"Content-Type": "application/json",
},
json=payload,
)
r.raise_for_status()
uri = r.json()["self"]

# poll for results
for _ in range(MAX_POLLS):
r = requests.get(
uri,
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
},
)
if not r.ok or not r.json()["status"] == "Succeeded":
sleep(1)
continue
r = requests.get(
r.json()["links"]["files"],
headers={
"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY,
},
)
r.raise_for_status()
transcriptions = []
for value in r.json()["values"]:
if value["kind"] == "Transcription":
r = requests.get(
value["links"]["contentUrl"],
headers={"Ocp-Apim-Subscription-Key": settings.AZURE_SPEECH_KEY},
)
r.raise_for_status()
transcriptions += [r.json()["combinedRecognizedPhrases"][0]["display"]]
return "\n".join(transcriptions)
assert False, "Max polls exceeded, Azure speech did not yield a response"


# 16kHz, 16-bit, mono
FFMPEG_WAV_ARGS = ["-vn", "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000"]

Expand Down

0 comments on commit 68a314d

Please sign in to comment.