From 71d86a8851a4a948836fccea05ada89668428d1a Mon Sep 17 00:00:00 2001
From: Markus Hennerbichler <markush@speechmatics.com>
Date: Sat, 24 Feb 2024 18:58:31 +0000
Subject: [PATCH] Add microphone realtime example

---
 examples/README.md                     |  8 +++--
 examples/transcribe_from_microphone.py | 49 ++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 examples/transcribe_from_microphone.py

diff --git a/examples/README.md b/examples/README.md
index c3c3be0..f1c648f 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,6 +1,8 @@
 # Examples
 
-This folder provides some examples of how the Speechmatics python client can be used to build different systems. The current examples include:
+This folder provides some examples of how the Speechmatics python client can be used to build different systems. 
+Each of the examples should have a separate README with all the necessary steps to get them up and running.
+The current examples include:
 
 1. [notification_flow](./notification_flow/README.md) (webhooks)
 
@@ -18,4 +20,6 @@ Demonstrates how to run a websocket server that acts as a proxy to a speechmatic
 
 Tools for batch synchronising a local folder of audio into a searchable database of transcriptions.
 
-Each of the examples should have a separate README with all the necessary steps to get them up and running.
+5. [Microphone transcription example](./transcribe_from_microphone.py)
+
+This shows how the `sounddevice` python package can be used to stream audio from a microphone to Speechmatics.
diff --git a/examples/transcribe_from_microphone.py b/examples/transcribe_from_microphone.py
new file mode 100644
index 0000000..9144754
--- /dev/null
+++ b/examples/transcribe_from_microphone.py
@@ -0,0 +1,49 @@
+import speechmatics
+import speechmatics.models
+import speechmatics.client
+import speechmatics.cli
+import asyncio
+import argparse
+import sys
+import sounddevice as sd
+
+class RawInputStreamWrapper:
+    def __init__(self, wrapped: sd.RawInputStream):
+        self.wrapped: sd.RawInputStream = wrapped
+
+    def read(self, frames):
+        return bytes(self.wrapped.read(frames)[0])
+
+
+async def transcribe_from_device(device, speechmatics_client, language: str):
+    frame_rate=44_100
+    with sd.RawInputStream(device=device, channels=1, samplerate=frame_rate, dtype='float32') as stream:
+        settings = speechmatics.models.AudioSettings(
+            sample_rate=frame_rate,
+            chunk_size=1024*4,
+            encoding="pcm_f32" + ("le" if sys.byteorder == "little" else "be"),
+        )
+        # Define transcription parameters
+        conf = speechmatics.models.TranscriptionConfig(language='en',operating_point="enhanced", max_delay=2, enable_partials=True, enable_entities=True)
+        await speechmatics_client.run(RawInputStreamWrapper(stream), conf, settings)
+
+def main(args):
+    speechmatics_client = speechmatics.client.WebsocketClient(connection_settings_or_auth_token=args.auth_token)
+    transcripts = speechmatics.cli.Transcripts(text="", json=[])
+    speechmatics.cli.add_printing_handlers(speechmatics_client, transcripts)
+
+    asyncio.run(transcribe_from_device(args.device, speechmatics_client, args.language))
+
+def int_or_str(text):
+    try:
+        return int(text)
+    except ValueError:
+        return text
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Speechmatics Microphone Realtime Transcription example')
+    parser.add_argument('-d', '--device', type=int_or_str, help='input device (numeric ID or substring)')
+    parser.add_argument('-a', '--auth_token', type=str, required=True)
+    parser.add_argument('-l', '--language', type=str, default='en')
+
+    main(parser.parse_args())
\ No newline at end of file