From 5ea89a92f219e6455f38540d81228326f9e19b37 Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 11:13:37 +0100 Subject: [PATCH 1/6] Init demo project --- .codespell-whitelist.txt | 0 .env.example | 1 + .pre-commit-config.yaml | 51 +++++ .secrets.baseline | 127 +++++++++++ README.md | 1 - pyproject.toml | 179 +++++++++++++++ realtime_over_websockets/__init__.py | 0 realtime_over_websockets/main.py | 79 +++++++ .../website_files/static/Audio.js | 209 ++++++++++++++++++ .../website_files/static/main.js | 6 + .../website_files/templates/chat.html | 23 ++ scripts/lint-pre-commit.sh | 31 +++ scripts/lint.sh | 11 + scripts/static-analysis.sh | 11 + scripts/static-pre-commit.sh | 32 +++ 15 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 .codespell-whitelist.txt create mode 100644 .env.example create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets.baseline create mode 100644 pyproject.toml create mode 100644 realtime_over_websockets/__init__.py create mode 100644 realtime_over_websockets/main.py create mode 100644 realtime_over_websockets/website_files/static/Audio.js create mode 100644 realtime_over_websockets/website_files/static/main.js create mode 100644 realtime_over_websockets/website_files/templates/chat.html create mode 100755 scripts/lint-pre-commit.sh create mode 100755 scripts/lint.sh create mode 100755 scripts/static-analysis.sh create mode 100755 scripts/static-pre-commit.sh diff --git a/.codespell-whitelist.txt b/.codespell-whitelist.txt new file mode 100644 index 0000000..e69de29 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..18678ba --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +OPENAI_API_KEY = sk-*************** diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..cb627a0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +default_stages: [pre-commit, pre-merge-commit] +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + stages: [pre-commit, pre-merge-commit, manual] + - id: end-of-file-fixer + stages: [pre-commit, pre-merge-commit, manual] + - id: check-yaml + stages: [pre-commit, pre-merge-commit, manual] + - id: check-added-large-files + stages: [pre-commit, pre-merge-commit, manual] + +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + stages: [pre-commit, pre-merge-commit, manual] + args: [--ignore-words=.codespell-whitelist.txt] + +- repo: local + hooks: + - id: lint + name: Linter + stages: [pre-commit, pre-merge-commit, manual] + entry: "scripts/lint-pre-commit.sh" + language: python + types: [python] + require_serial: true + verbose: true + +- repo: local + hooks: + - id: static-analysis + name: Static analysis + entry: "scripts/static-pre-commit.sh" + language: python + types: [python] + require_serial: true + verbose: true + +- repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] + exclude: package.lock.json + stages: [pre-commit, pre-merge-commit, manual] diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000..1e8c51b --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,127 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + } + ], + "results": {}, + "generated_at": "2025-01-03T09:53:36Z" +} diff --git a/README.md b/README.md index c1b477b..38fe612 100644 --- a/README.md +++ b/README.md @@ -1,2 +1 @@ # RealtimeAgent-WebSocketAudioAdapter -Basic demo of AG2 RealtimeAgent communication over WebSocketAudioAdapter diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e0bed15 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,179 @@ +[project] +name = "realtime-over-websockets" +description = "Demo project showcasing the RealtimeAgent communication over WebSockets" +version = "0.1.0" +readme = "README.md" +authors = [ + { name = "Tvrtko Sternak", email = "sternakt@gmail.com" }, +] + +requires-python = ">=3.9" + +dependencies = [ + "ag2==0.6.1", + "fastapi==0.115.0", + "uvicorn==0.30.6", + "jinja2==3.1.4", +] + +[project.optional-dependencies] + +# dev dependencies + +type = [ + "mypy==1.14.0", +] + +lint = [ + "ruff==0.8.4", + "bandit==1.8.0", + "semgrep==1.101.0", + "codespell==2.3.0", +] + +testing = [] + +dev = [ + "realtime-over-websockets[type,lint,testing]", + "pre-commit==4.0.1", + "detect-secrets==1.5.0", +] + +[tool.mypy] +files = ["realtime_over_websockets"] +strict = true +python_version = "3.9" +ignore_missing_imports = true +install_types = true +non_interactive = true +plugins = ["pydantic.mypy"] + +# from https://blog.wolt.com/engineering/2021/09/30/professional-grade-mypy-configuration/ +disallow_untyped_defs = true +no_implicit_optional = true +check_untyped_defs = true +warn_return_any = true +show_error_codes = true +warn_unused_ignores = true + +disallow_incomplete_defs = true +disallow_untyped_decorators = true +disallow_any_unimported = false + +[tool.ruff] +fix = true +line-length = 88 +target-version = "py38" +include = [ + "realtime_over_websockets/**/*.py", + "tests/**/*.py", + "pyproject.toml", +] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors https://docs.astral.sh/ruff/rules/#error-e + "W", # pycodestyle warnings https://docs.astral.sh/ruff/rules/#warning-w + "C90", # mccabe https://docs.astral.sh/ruff/rules/#mccabe-c90 + "N", # pep8-naming https://docs.astral.sh/ruff/rules/#pep8-naming-n + "D", # pydocstyle https://docs.astral.sh/ruff/rules/#pydocstyle-d + "I", # isort https://docs.astral.sh/ruff/rules/#isort-i + "F", # pyflakes https://docs.astral.sh/ruff/rules/#pyflakes-f + "ASYNC", # flake8-async https://docs.astral.sh/ruff/rules/#flake8-async-async + "C4", # flake8-comprehensions https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "B", # flake8-bugbear https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "Q", # flake8-quotes https://docs.astral.sh/ruff/rules/#flake8-quotes-q + "T20", # flake8-print https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "SIM", # flake8-simplify https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "PT", # flake8-pytest-style https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt + "PTH", # flake8-use-pathlib https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "TCH", # flake8-type-checking https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch + "RUF", # Ruff-specific rules https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf + "PERF", # Perflint https://docs.astral.sh/ruff/rules/#perflint-perf + "UP", # pyupgrade https://docs.astral.sh/ruff/rules/#pyupgrade-up +] + +ignore = [ + "ASYNC109", # own timeout implementation + + "E501", # line too long, handled by formatter later + "C901", # too complex + + # todo pep8-naming + "N817", # CamelCase `*` imported as acronym `*` + "N815", # Variable `*` in class scope should not be mixedCase + "N803", # Argument name `expandMessageExamples` should be lowercase + + # todo pydocstyle + "D100", # missing docstring in public module + "D101", + "D102", + "D103", + "D104", # missing docstring in public package + "D105", # missing docstring in magic methods + "D106", # missing docstring in public nested class + "D107", # missing docstring in __init__ +] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = [ + "D101", # docstrings + "D102", + "D103", + "PLR2004", # magic-value-comparison + "S101", # use assert +] + + +[tool.ruff.lint.isort] +case-sensitive = true + +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.flake8-bugbear] + +[tool.pytest.ini_options] +minversion = "7.0" +addopts = "-q -m 'not slow'" +testpaths = ["tests"] +markers = ["slow", "all"] +asyncio_default_fixture_loop_scope = "function" + +[tool.coverage.run] +parallel = true +branch = true +concurrency = ["multiprocessing", "thread"] +source = ["docs/docs_src", "examples", "faststream", "tests"] +context = '${CONTEXT}' +omit = ["**/__init__.py", "tests/mypy/*"] + +[tool.coverage.report] +show_missing = true +skip_empty = true +exclude_also = [ + "if __name__ == .__main__.:", + "self.logger", + "def __repr__", + "lambda: None", + "from .*", + "import .*", + '@(abc\.)?abstractmethod', + "raise NotImplementedError", + 'raise AssertionError', + 'logger\..*', + "pass", + '\.\.\.', +] +omit = [ + '*/__about__.py', +] + +[tool.bandit] + +[tool.codespell] +skip = "./venv*,./docs/site/*,./htmlcov" +ignore-words = ".codespell-whitelist.txt" diff --git a/realtime_over_websockets/__init__.py b/realtime_over_websockets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/realtime_over_websockets/main.py b/realtime_over_websockets/main.py new file mode 100644 index 0000000..d9434d1 --- /dev/null +++ b/realtime_over_websockets/main.py @@ -0,0 +1,79 @@ +import os +from logging import getLogger +from pathlib import Path +from typing import Annotated + +from autogen.agentchat.realtime_agent import RealtimeAgent, WebSocketAudioAdapter +from dotenv import load_dotenv +from fastapi import FastAPI, Request, WebSocket +from fastapi.responses import HTMLResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates + +load_dotenv() + +realtime_llm_config = { + "timeout": 600, + "config_list": [ + { + "model": "gpt-4o-mini-realtime-preview", + "api_key": os.getenv("OPENAI_API_KEY"), + "tags": ["gpt-4o-mini-realtime", "realtime"], + }, + ], + "temperature": 0.8, +} + +PORT = 5050 + +app = FastAPI() + + +@app.get("/", response_class=JSONResponse) +async def index_page() -> dict[str, str]: + return {"message": "WebSocket Audio Stream Server is running!"} + + +website_files_path = Path(__file__).parent / "website_files" + +app.mount( + "/static", StaticFiles(directory=website_files_path / "static"), name="static" +) + +templates = Jinja2Templates(directory=website_files_path / "templates") + + +@app.get("/start-chat/", response_class=HTMLResponse) +async def start_chat(request: Request) -> HTMLResponse: + """Endpoint to return the HTML page for audio chat.""" + port = PORT # Extract the client's port + return templates.TemplateResponse("chat.html", {"request": request, "port": port}) + + +@app.websocket("/media-stream") +async def handle_media_stream(websocket: WebSocket) -> None: + """Handle WebSocket connections providing audio stream and OpenAI.""" + await websocket.accept() + + logger = getLogger("uvicorn.error") + + audio_adapter = WebSocketAudioAdapter(websocket, logger=logger) + realtime_agent = RealtimeAgent( + name="Weather Bot", + system_message="Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying 'How can I help you'?", + llm_config=realtime_llm_config, + audio_adapter=audio_adapter, + logger=logger, + ) + + @realtime_agent.register_realtime_function( # type: ignore [misc] + name="get_weather", description="Get the current weather" + ) + def get_weather(location: Annotated[str, "city"]) -> str: + return ( + "The weather is cloudy." + if location == "Seattle" + else "The weather is sunny." + ) + + await realtime_agent.run() diff --git a/realtime_over_websockets/website_files/static/Audio.js b/realtime_over_websockets/website_files/static/Audio.js new file mode 100644 index 0000000..945456a --- /dev/null +++ b/realtime_over_websockets/website_files/static/Audio.js @@ -0,0 +1,209 @@ +// Audio.js + +export class Audio { + constructor(webSocketUrl) { + this.webSocketUrl = webSocketUrl; + this.socket = null; + // audio out + this.outAudioContext = null; + this.sourceNode = null; + this.bufferQueue = []; // Queue to store audio buffers + this.isPlaying = false; // Flag to check if audio is playing + // audio in + this.inAudioContext = null; + this.processorNode = null; + this.stream = null; + this.bufferSize = 8192; // Define the buffer size for capturing chunks + } + + // Initialize WebSocket and start receiving audio data + async start() { + try { + // Initialize WebSocket connection + this.socket = new WebSocket(this.webSocketUrl); + + this.socket.onopen = () => { + console.log("WebSocket connected."); + const sessionStarted = { + event: "start", + start: { + streamSid: crypto.randomUUID(), + } + } + this.socket.send(JSON.stringify(sessionStarted)) + console.log("sent session start") + }; + + this.socket.onclose = () => { + console.log("WebSocket disconnected."); + }; + + this.socket.onmessage = async (event) => { + console.log("Received web socket message") + const message = JSON.parse(event.data) + if (message.event == "media") { + const bufferString = atob(message.media.payload); // Decode base64 to binary string + const byteArray = new Uint8Array(bufferString.length); + for (let i = 0; i < bufferString.length; i++) { + byteArray[i] = bufferString.charCodeAt(i); //Create a byte array + } + //const payload = base64.decode(message.media.payload) + // Ensure the data is an ArrayBuffer, if it's a Blob, convert it + //const pcmData = event.data instanceof ArrayBuffer ? event.data : await event.data.arrayBuffer(); + // + + this.queuePcmData(byteArray.buffer); // Push the received data into the buffer queue + if (!this.isPlaying) { + this.playFromQueue(); // Start playing if not already playing + } + } + }; + this.outAudioContext = new (window.AudioContext || window.webkitAudioContext)(); + console.log("Audio player initialized."); + + // audio in + // Get user media (microphone access) + + const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate:24000} }); + this.stream = stream; + this.inAudioContext = new AudioContext({ sampleRate: 24000 }); + + // Create an AudioNode to capture the microphone stream + const sourceNode = this.inAudioContext.createMediaStreamSource(stream); + + // Create a ScriptProcessorNode (or AudioWorkletProcessor for better performance) + this.processorNode = this.inAudioContext.createScriptProcessor(this.bufferSize, 1, 1); + + // Process audio data when available + this.processorNode.onaudioprocess = (event) => { + const inputBuffer = event.inputBuffer; + + // Extract PCM 16-bit data from input buffer (mono channel) + const audioData = this.extractPcm16Data(inputBuffer); + const byteArray = new Uint8Array(audioData); // Create a Uint8Array view + const bufferString = String.fromCharCode(...byteArray); // convert each byte of the buffer to a character + const audioBase64String = btoa(bufferString); // Apply base64 + // Send the PCM data over the WebSocket + if (this.socket.readyState === WebSocket.OPEN) { + const audioMessage = { + 'event': "media", + 'media': { + 'timestamp': Date.now(), + 'payload': audioBase64String + } + } + this.socket.send(JSON.stringify(audioMessage)); + } + }; + + // Connect the source node to the processor node and the processor node to the destination (speakers) + sourceNode.connect(this.processorNode); + this.processorNode.connect(this.inAudioContext.destination); + console.log("Audio capture started."); + } catch (err) { + console.error("Error initializing audio player:", err); + } + } + + // Stop receiving and playing audio + stop() { + this.stop_out() + this.stop_in() + } + + stop_out() { + if (this.socket) { + this.socket.close(); + } + if (this.outAudioContext) { + this.outAudioContext.close(); + } + console.log("Audio player stopped."); + } + + stop_in() { + if (this.processorNode) { + this.processorNode.disconnect(); + } + if (this.inAudioContext) { + this.inAudioContext.close(); + } + if (this.socket) { + this.socket.close(); + } + if (this.stream) { + this.stream.getTracks().forEach(track => track.stop()); + } + console.log("Audio capture stopped."); + } + + // Queue PCM data for later playback + queuePcmData(pcmData) { + this.bufferQueue.push(pcmData); + } + + // Play audio from the queue + async playFromQueue() { + if (this.bufferQueue.length === 0) { + this.isPlaying = false; // No more data to play + return; + } + + this.isPlaying = true; + const pcmData = this.bufferQueue.shift(); // Get the next chunk from the queue + + // Convert PCM 16-bit data to ArrayBuffer + const audioBuffer = await this.decodePcm16Data(pcmData); + + // Create an audio source and play it + const source = this.outAudioContext.createBufferSource(); + source.buffer = audioBuffer; + source.connect(this.outAudioContext.destination); + source.onended = () => { + // Play the next chunk after the current one ends + this.playFromQueue(); + }; + source.start(); + } + + // Decode PCM 16-bit data into AudioBuffer + async decodePcm16Data(pcmData) { + const audioData = new Float32Array(pcmData.byteLength / 2); + + // Convert PCM 16-bit to Float32Array + const dataView = new DataView(pcmData); + for (let i = 0; i < audioData.length; i++) { + const pcm16 = dataView.getInt16(i * 2, true); // true means little-endian + audioData[i] = pcm16 / 32768; // Convert to normalized float (-1 to 1) + } + + // Create an audio buffer from the Float32Array + const audioBuffer = this.outAudioContext.createBuffer(1, audioData.length, 24000); + audioBuffer.getChannelData(0).set(audioData); + + return audioBuffer; + } + + // Convert audio buffer to PCM 16-bit data + extractPcm16Data(buffer) { + const sampleRate = buffer.sampleRate; + const length = buffer.length; + const pcmData = new Int16Array(length); + + // Convert the float samples to PCM 16-bit (scaled between -32768 and 32767) + for (let i = 0; i < length; i++) { + pcmData[i] = Math.max(-32768, Math.min(32767, buffer.getChannelData(0)[i] * 32767)); + } + + // Convert Int16Array to a binary buffer (ArrayBuffer) + const pcmBuffer = new ArrayBuffer(pcmData.length * 2); // 2 bytes per sample + const pcmView = new DataView(pcmBuffer); + + for (let i = 0; i < pcmData.length; i++) { + pcmView.setInt16(i * 2, pcmData[i], true); // true means little-endian + } + + return pcmBuffer; + } + + } diff --git a/realtime_over_websockets/website_files/static/main.js b/realtime_over_websockets/website_files/static/main.js new file mode 100644 index 0000000..14b505e --- /dev/null +++ b/realtime_over_websockets/website_files/static/main.js @@ -0,0 +1,6 @@ +import { Audio } from './Audio.js'; + +// Create an instance of AudioPlayer with the WebSocket URL +const audio = new Audio(socketUrl); +// Start receiving and playing audio +audio.start(); diff --git a/realtime_over_websockets/website_files/templates/chat.html b/realtime_over_websockets/website_files/templates/chat.html new file mode 100644 index 0000000..2ee46ea --- /dev/null +++ b/realtime_over_websockets/website_files/templates/chat.html @@ -0,0 +1,23 @@ + + + + + + Audio Chat + + + + + + +

Audio Chat

+

Ensure microphone and speaker access is enabled.

+ + diff --git a/scripts/lint-pre-commit.sh b/scripts/lint-pre-commit.sh new file mode 100755 index 0000000..9b4c7c7 --- /dev/null +++ b/scripts/lint-pre-commit.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# from: https://jaredkhan.com/blog/mypy-pre-commit + +# A script for running mypy, +# with all its dependencies installed. + +set -o errexit + +# Change directory to the project root directory. +cd "$(dirname "$0")"/.. + +# Install the dependencies into the mypy env. +# Note that this can take seconds to run. +# In my case, I need to use a custom index URL. +# Avoid pip spending time quietly retrying since +# likely cause of failure is lack of VPN connection. +pip install --editable ".[dev]" \ + --retries 1 \ + --no-input \ + --quiet + +# Run on all files, +# ignoring the paths passed to this script, +# so as not to miss type errors. +# My repo makes use of namespace packages. +# Use the namespace-packages flag +# and specify the package to run on explicitly. +# Note that we do not use --ignore-missing-imports, +# as this can give us false confidence in our results. +./scripts/lint.sh diff --git a/scripts/lint.sh b/scripts/lint.sh new file mode 100755 index 0000000..e9d341a --- /dev/null +++ b/scripts/lint.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +echo "Running ruff linter (isort, flake, pyupgrade, etc. replacement)..." +ruff check --exit-non-zero-on-fix + +echo "Running ruff formatter (black replacement)..." +ruff format + +echo "Running codespell to find typos..." +codespell diff --git a/scripts/static-analysis.sh b/scripts/static-analysis.sh new file mode 100755 index 0000000..4edd6f1 --- /dev/null +++ b/scripts/static-analysis.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +echo "Running mypy..." +mypy + +echo "Running bandit..." +bandit -c pyproject.toml -r realtime_over_websockets + +echo "Running semgrep..." +semgrep scan --config auto --error diff --git a/scripts/static-pre-commit.sh b/scripts/static-pre-commit.sh new file mode 100755 index 0000000..d984100 --- /dev/null +++ b/scripts/static-pre-commit.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +# taken from: https://jaredkhan.com/blog/mypy-pre-commit + +# A script for running mypy, +# with all its dependencies installed. + +set -o errexit + +# Change directory to the project root directory. +cd "$(dirname "$0")"/.. + +# Install the dependencies into the mypy env. +# Note that this can take seconds to run. +# In my case, I need to use a custom index URL. +# Avoid pip spending time quietly retrying since +# likely cause of failure is lack of VPN connection. +pip install --editable ".[dev]" \ + --retries 1 \ + --no-input \ + --quiet + +# Run on all files, +# ignoring the paths passed to this script, +# so as not to miss type errors. +# My repo makes use of namespace packages. +# Use the namespace-packages flag +# and specify the package to run on explicitly. +# Note that we do not use --ignore-missing-imports, +# as this can give us false confidence in our results. +# mypy faststream +./scripts/static-analysis.sh From 8bf2ee92f3cf4e6c3fa665beb6dfc7591df4dd94 Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 11:24:16 +0100 Subject: [PATCH 2/6] Cleanup README --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 38fe612..a08f3b5 100644 --- a/README.md +++ b/README.md @@ -1 +1,49 @@ -# RealtimeAgent-WebSocketAudioAdapter + +# **RealtimeAgent over WebSockets** + +This project demonstrates how to create a voice assistant using Python, FastAPI, WebSockets, and an AG2 RealtimeAgent. The application streams audio from a browser to a FastAPI server and enables real-time voice communication with the RealtimeAgent. + +## **Key Features** +- **WebSocket Audio Streaming**: Direct real-time audio streaming between the browser and server. +- **FastAPI Integration**: A lightweight Python backend for handling WebSocket traffic. + +## **Prerequisites** + +Before you begin, ensure you have the following: +- **Python 3.9+**: The project was tested with `3.9`. Download [here](https://www.python.org/downloads/). +- **API Access**: Access to the OpenAI API (credentials required). + +## **Local Setup** + +Follow these steps to set up the project locally: + +### **1. Clone the Repository** +```bash +git clone https://github.com/sternakt/RealtimeAgent-WebSocketAudioAdapter.git +cd RealtimeAgent-WebSocketAudioAdapter +``` + +### **2. Set Up Environment Variables** +Create a `.env` file based on the provided `.env.example`: +```bash +cp .env.example .env +``` +Add your OPENAI API credentials to the `.env` file. + +### **3. Install Dependencies** +Install the required Python packages using `pip`: +```bash +pip install . +``` + +### **4. Start the Server** +Run the application with Uvicorn: +```bash +uvicorn realtime_over_websockets.main:app --port 5050 +``` + +## **Test the App** +With the server running, open the client application in your browser. Speak into your microphone, and the AI assistant will respond in real time. + +## **License** +This project is licensed under the [MIT License](LICENSE). From 9fa18b8a7f3dbb623d28c29811395e7f7018de30 Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 11:31:08 +0100 Subject: [PATCH 3/6] Add pre-commit workflow --- .github/workflows/pre-commit.yaml | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .github/workflows/pre-commit.yaml diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml new file mode 100644 index 0000000..024c032 --- /dev/null +++ b/.github/workflows/pre-commit.yaml @@ -0,0 +1,32 @@ +name: Pre-commit Checks + +on: + push: + branches: + - main + - '**' # Runs on all branches + pull_request: + +jobs: + pre-commit: + name: Run Pre-commit Hooks + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ".[dev]" + + - name: Run pre-commit + run: | + pre-commit install + pre-commit run --all-files From 31aba2ac5af3acbf377464657a16f48257b56896 Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 11:35:26 +0100 Subject: [PATCH 4/6] Update pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb627a0..968ac40 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_stages: [pre-commit, pre-merge-commit] repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace stages: [pre-commit, pre-merge-commit, manual] From f8dae11b2b0fbd8f0afb22281670ae607835159b Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 11:43:33 +0100 Subject: [PATCH 5/6] Update pre-commit --- .github/workflows/pre-commit.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 024c032..f247ff1 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -5,7 +5,6 @@ on: branches: - main - '**' # Runs on all branches - pull_request: jobs: pre-commit: From 9339f5ed7a7b0f198100d619eed6767e117174b3 Mon Sep 17 00:00:00 2001 From: Tvrtko Sternak Date: Fri, 3 Jan 2025 12:34:15 +0100 Subject: [PATCH 6/6] Add pytest to project --- .github/workflows/test.yaml | 30 ++++++++++++++++++++++++++++++ pyproject.toml | 5 ++++- tests/test_app.py | 11 +++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test.yaml create mode 100644 tests/test_app.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..d6d30db --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,30 @@ +name: Run Pytest + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run pytest + run: pytest tests/ diff --git a/pyproject.toml b/pyproject.toml index e0bed15..5441a10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,10 @@ lint = [ "codespell==2.3.0", ] -testing = [] +testing = [ + "pytest==8.3.4", + "pytest-asyncio==0.25.0", +] dev = [ "realtime-over-websockets[type,lint,testing]", diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..069739d --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,11 @@ +from fastapi.testclient import TestClient + +from realtime_over_websockets.main import app + +client = TestClient(app) + + +def test_index_page(): + response = client.get("/") + assert response.status_code == 200 + assert response.json() == {"message": "WebSocket Audio Stream Server is running!"}