From 71d86a8851a4a948836fccea05ada89668428d1a Mon Sep 17 00:00:00 2001 From: Markus Hennerbichler Date: Sat, 24 Feb 2024 18:58:31 +0000 Subject: [PATCH] Add microphone realtime example --- examples/README.md | 8 +++-- examples/transcribe_from_microphone.py | 49 ++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 examples/transcribe_from_microphone.py diff --git a/examples/README.md b/examples/README.md index c3c3be0..f1c648f 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,6 +1,8 @@ # Examples -This folder provides some examples of how the Speechmatics python client can be used to build different systems. The current examples include: +This folder provides some examples of how the Speechmatics python client can be used to build different systems. +Each of the examples should have a separate README with all the necessary steps to get them up and running. +The current examples include: 1. [notification_flow](./notification_flow/README.md) (webhooks) @@ -18,4 +20,6 @@ Demonstrates how to run a websocket server that acts as a proxy to a speechmatic Tools for batch synchronising a local folder of audio into a searchable database of transcriptions. -Each of the examples should have a separate README with all the necessary steps to get them up and running. +5. [Microphone transcription example](./transcribe_from_microphone.py) + +This shows how the `sounddevice` python package can be used to stream audio from a microphone to Speechmatics. diff --git a/examples/transcribe_from_microphone.py b/examples/transcribe_from_microphone.py new file mode 100644 index 0000000..9144754 --- /dev/null +++ b/examples/transcribe_from_microphone.py @@ -0,0 +1,49 @@ +import speechmatics +import speechmatics.models +import speechmatics.client +import speechmatics.cli +import asyncio +import argparse +import sys +import sounddevice as sd + +class RawInputStreamWrapper: + def __init__(self, wrapped: sd.RawInputStream): + self.wrapped: sd.RawInputStream = wrapped + + def read(self, frames): + return bytes(self.wrapped.read(frames)[0]) + + +async def transcribe_from_device(device, speechmatics_client, language: str): + frame_rate=44_100 + with sd.RawInputStream(device=device, channels=1, samplerate=frame_rate, dtype='float32') as stream: + settings = speechmatics.models.AudioSettings( + sample_rate=frame_rate, + chunk_size=1024*4, + encoding="pcm_f32" + ("le" if sys.byteorder == "little" else "be"), + ) + # Define transcription parameters + conf = speechmatics.models.TranscriptionConfig(language='en',operating_point="enhanced", max_delay=2, enable_partials=True, enable_entities=True) + await speechmatics_client.run(RawInputStreamWrapper(stream), conf, settings) + +def main(args): + speechmatics_client = speechmatics.client.WebsocketClient(connection_settings_or_auth_token=args.auth_token) + transcripts = speechmatics.cli.Transcripts(text="", json=[]) + speechmatics.cli.add_printing_handlers(speechmatics_client, transcripts) + + asyncio.run(transcribe_from_device(args.device, speechmatics_client, args.language)) + +def int_or_str(text): + try: + return int(text) + except ValueError: + return text + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Speechmatics Microphone Realtime Transcription example') + parser.add_argument('-d', '--device', type=int_or_str, help='input device (numeric ID or substring)') + parser.add_argument('-a', '--auth_token', type=str, required=True) + parser.add_argument('-l', '--language', type=str, default='en') + + main(parser.parse_args()) \ No newline at end of file