Add microphone realtime example

speechmatics · Feb 24, 2024 · 71d86a8 · 71d86a8
1 parent a9e3b31
commit 71d86a8
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 2 deletions.
diff --git a/examples/README.md b/examples/README.md
@@ -1,6 +1,8 @@
 # Examples
 
-This folder provides some examples of how the Speechmatics python client can be used to build different systems. The current examples include:
+This folder provides some examples of how the Speechmatics python client can be used to build different systems. 
+Each of the examples should have a separate README with all the necessary steps to get them up and running.
+The current examples include:
 
 1. [notification_flow](./notification_flow/README.md) (webhooks)
 
@@ -18,4 +20,6 @@ Demonstrates how to run a websocket server that acts as a proxy to a speechmatic
 
 Tools for batch synchronising a local folder of audio into a searchable database of transcriptions.
 
-Each of the examples should have a separate README with all the necessary steps to get them up and running.
+5. [Microphone transcription example](./transcribe_from_microphone.py)
+
+This shows how the `sounddevice` python package can be used to stream audio from a microphone to Speechmatics.
diff --git a/examples/transcribe_from_microphone.py b/examples/transcribe_from_microphone.py
@@ -0,0 +1,49 @@
+import speechmatics
+import speechmatics.models
+import speechmatics.client
+import speechmatics.cli
+import asyncio
+import argparse
+import sys
+import sounddevice as sd
+
+class RawInputStreamWrapper:
+    def __init__(self, wrapped: sd.RawInputStream):
+        self.wrapped: sd.RawInputStream = wrapped
+
+    def read(self, frames):
+        return bytes(self.wrapped.read(frames)[0])
+
+
+async def transcribe_from_device(device, speechmatics_client, language: str):
+    frame_rate=44_100
+    with sd.RawInputStream(device=device, channels=1, samplerate=frame_rate, dtype='float32') as stream:
+        settings = speechmatics.models.AudioSettings(
+            sample_rate=frame_rate,
+            chunk_size=1024*4,
+            encoding="pcm_f32" + ("le" if sys.byteorder == "little" else "be"),
+        )
+        # Define transcription parameters
+        conf = speechmatics.models.TranscriptionConfig(language='en',operating_point="enhanced", max_delay=2, enable_partials=True, enable_entities=True)
+        await speechmatics_client.run(RawInputStreamWrapper(stream), conf, settings)
+
+def main(args):
+    speechmatics_client = speechmatics.client.WebsocketClient(connection_settings_or_auth_token=args.auth_token)
+    transcripts = speechmatics.cli.Transcripts(text="", json=[])
+    speechmatics.cli.add_printing_handlers(speechmatics_client, transcripts)
+
+    asyncio.run(transcribe_from_device(args.device, speechmatics_client, args.language))
+
+def int_or_str(text):
+    try:
+        return int(text)
+    except ValueError:
+        return text
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Speechmatics Microphone Realtime Transcription example')
+    parser.add_argument('-d', '--device', type=int_or_str, help='input device (numeric ID or substring)')
+    parser.add_argument('-a', '--auth_token', type=str, required=True)
+    parser.add_argument('-l', '--language', type=str, default='en')
+
+    main(parser.parse_args())