From 5ea89a92f219e6455f38540d81228326f9e19b37 Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 11:13:37 +0100
Subject: [PATCH 1/6] Init demo project

---
 .codespell-whitelist.txt                      |   0
 .env.example                                  |   1 +
 .pre-commit-config.yaml                       |  51 +++++
 .secrets.baseline                             | 127 +++++++++++
 README.md                                     |   1 -
 pyproject.toml                                | 179 +++++++++++++++
 realtime_over_websockets/__init__.py          |   0
 realtime_over_websockets/main.py              |  79 +++++++
 .../website_files/static/Audio.js             | 209 ++++++++++++++++++
 .../website_files/static/main.js              |   6 +
 .../website_files/templates/chat.html         |  23 ++
 scripts/lint-pre-commit.sh                    |  31 +++
 scripts/lint.sh                               |  11 +
 scripts/static-analysis.sh                    |  11 +
 scripts/static-pre-commit.sh                  |  32 +++
 15 files changed, 760 insertions(+), 1 deletion(-)
 create mode 100644 .codespell-whitelist.txt
 create mode 100644 .env.example
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .secrets.baseline
 create mode 100644 pyproject.toml
 create mode 100644 realtime_over_websockets/__init__.py
 create mode 100644 realtime_over_websockets/main.py
 create mode 100644 realtime_over_websockets/website_files/static/Audio.js
 create mode 100644 realtime_over_websockets/website_files/static/main.js
 create mode 100644 realtime_over_websockets/website_files/templates/chat.html
 create mode 100755 scripts/lint-pre-commit.sh
 create mode 100755 scripts/lint.sh
 create mode 100755 scripts/static-analysis.sh
 create mode 100755 scripts/static-pre-commit.sh

diff --git a/.codespell-whitelist.txt b/.codespell-whitelist.txt
new file mode 100644
index 0000000..e69de29
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..18678ba
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+OPENAI_API_KEY = sk-***************
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..cb627a0
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,51 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+default_stages: [pre-commit, pre-merge-commit]
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: trailing-whitespace
+        stages: [pre-commit, pre-merge-commit, manual]
+    -   id: end-of-file-fixer
+        stages: [pre-commit, pre-merge-commit, manual]
+    -   id: check-yaml
+        stages: [pre-commit, pre-merge-commit, manual]
+    -   id: check-added-large-files
+        stages: [pre-commit, pre-merge-commit, manual]
+
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.3.0
+    hooks:
+    -   id: codespell
+        stages: [pre-commit, pre-merge-commit, manual]
+        args: [--ignore-words=.codespell-whitelist.txt]
+
+-   repo: local
+    hooks:
+    -   id: lint
+        name: Linter
+        stages: [pre-commit, pre-merge-commit, manual]
+        entry: "scripts/lint-pre-commit.sh"
+        language: python
+        types: [python]
+        require_serial: true
+        verbose: true
+
+-   repo: local
+    hooks:
+    -   id: static-analysis
+        name: Static analysis
+        entry: "scripts/static-pre-commit.sh"
+        language: python
+        types: [python]
+        require_serial: true
+        verbose: true
+
+-   repo: https://github.com/Yelp/detect-secrets
+    rev: v1.5.0
+    hooks:
+    -   id: detect-secrets
+        args: ['--baseline', '.secrets.baseline']
+        exclude: package.lock.json
+        stages: [pre-commit, pre-merge-commit, manual]
diff --git a/.secrets.baseline b/.secrets.baseline
new file mode 100644
index 0000000..1e8c51b
--- /dev/null
+++ b/.secrets.baseline
@@ -0,0 +1,127 @@
+{
+  "version": "1.5.0",
+  "plugins_used": [
+    {
+      "name": "ArtifactoryDetector"
+    },
+    {
+      "name": "AWSKeyDetector"
+    },
+    {
+      "name": "AzureStorageKeyDetector"
+    },
+    {
+      "name": "Base64HighEntropyString",
+      "limit": 4.5
+    },
+    {
+      "name": "BasicAuthDetector"
+    },
+    {
+      "name": "CloudantDetector"
+    },
+    {
+      "name": "DiscordBotTokenDetector"
+    },
+    {
+      "name": "GitHubTokenDetector"
+    },
+    {
+      "name": "GitLabTokenDetector"
+    },
+    {
+      "name": "HexHighEntropyString",
+      "limit": 3.0
+    },
+    {
+      "name": "IbmCloudIamDetector"
+    },
+    {
+      "name": "IbmCosHmacDetector"
+    },
+    {
+      "name": "IPPublicDetector"
+    },
+    {
+      "name": "JwtTokenDetector"
+    },
+    {
+      "name": "KeywordDetector",
+      "keyword_exclude": ""
+    },
+    {
+      "name": "MailchimpDetector"
+    },
+    {
+      "name": "NpmDetector"
+    },
+    {
+      "name": "OpenAIDetector"
+    },
+    {
+      "name": "PrivateKeyDetector"
+    },
+    {
+      "name": "PypiTokenDetector"
+    },
+    {
+      "name": "SendGridDetector"
+    },
+    {
+      "name": "SlackDetector"
+    },
+    {
+      "name": "SoftlayerDetector"
+    },
+    {
+      "name": "SquareOAuthDetector"
+    },
+    {
+      "name": "StripeDetector"
+    },
+    {
+      "name": "TelegramBotTokenDetector"
+    },
+    {
+      "name": "TwilioKeyDetector"
+    }
+  ],
+  "filters_used": [
+    {
+      "path": "detect_secrets.filters.allowlist.is_line_allowlisted"
+    },
+    {
+      "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
+      "min_level": 2
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_indirect_reference"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_likely_id_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_lock_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_potential_uuid"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_sequential_string"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_swagger_file"
+    },
+    {
+      "path": "detect_secrets.filters.heuristic.is_templated_secret"
+    }
+  ],
+  "results": {},
+  "generated_at": "2025-01-03T09:53:36Z"
+}
diff --git a/README.md b/README.md
index c1b477b..38fe612 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1 @@
 # RealtimeAgent-WebSocketAudioAdapter
-Basic demo of AG2 RealtimeAgent communication over WebSocketAudioAdapter
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..e0bed15
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,179 @@
+[project]
+name = "realtime-over-websockets"
+description = "Demo project showcasing the RealtimeAgent communication over WebSockets"
+version = "0.1.0"
+readme = "README.md"
+authors = [
+    { name = "Tvrtko Sternak", email = "sternakt@gmail.com" },
+]
+
+requires-python = ">=3.9"
+
+dependencies = [
+    "ag2==0.6.1",
+    "fastapi==0.115.0",
+    "uvicorn==0.30.6",
+    "jinja2==3.1.4",
+]
+
+[project.optional-dependencies]
+
+# dev dependencies
+
+type = [
+    "mypy==1.14.0",
+]
+
+lint = [
+    "ruff==0.8.4",
+    "bandit==1.8.0",
+    "semgrep==1.101.0",
+    "codespell==2.3.0",
+]
+
+testing = []
+
+dev = [
+    "realtime-over-websockets[type,lint,testing]",
+    "pre-commit==4.0.1",
+    "detect-secrets==1.5.0",
+]
+
+[tool.mypy]
+files = ["realtime_over_websockets"]
+strict = true
+python_version = "3.9"
+ignore_missing_imports = true
+install_types = true
+non_interactive = true
+plugins = ["pydantic.mypy"]
+
+# from https://blog.wolt.com/engineering/2021/09/30/professional-grade-mypy-configuration/
+disallow_untyped_defs = true
+no_implicit_optional = true
+check_untyped_defs = true
+warn_return_any = true
+show_error_codes = true
+warn_unused_ignores = true
+
+disallow_incomplete_defs = true
+disallow_untyped_decorators = true
+disallow_any_unimported = false
+
+[tool.ruff]
+fix = true
+line-length = 88
+target-version = "py38"
+include = [
+    "realtime_over_websockets/**/*.py",
+    "tests/**/*.py",
+    "pyproject.toml",
+]
+
+[tool.ruff.lint]
+select = [
+    "E",     # pycodestyle errors     https://docs.astral.sh/ruff/rules/#error-e
+    "W",     # pycodestyle warnings   https://docs.astral.sh/ruff/rules/#warning-w
+    "C90",   # mccabe                 https://docs.astral.sh/ruff/rules/#mccabe-c90
+    "N",     # pep8-naming            https://docs.astral.sh/ruff/rules/#pep8-naming-n
+    "D",     # pydocstyle             https://docs.astral.sh/ruff/rules/#pydocstyle-d
+    "I",     # isort                  https://docs.astral.sh/ruff/rules/#isort-i
+    "F",     # pyflakes               https://docs.astral.sh/ruff/rules/#pyflakes-f
+    "ASYNC", # flake8-async           https://docs.astral.sh/ruff/rules/#flake8-async-async
+    "C4",    # flake8-comprehensions  https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4
+    "B",     # flake8-bugbear         https://docs.astral.sh/ruff/rules/#flake8-bugbear-b
+    "Q",     # flake8-quotes          https://docs.astral.sh/ruff/rules/#flake8-quotes-q
+    "T20",   # flake8-print           https://docs.astral.sh/ruff/rules/#flake8-print-t20
+    "SIM",   # flake8-simplify        https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
+    "PT",    # flake8-pytest-style    https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
+    "PTH",   # flake8-use-pathlib     https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
+    "TCH",   # flake8-type-checking   https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
+    "RUF",   # Ruff-specific rules    https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
+    "PERF",  # Perflint               https://docs.astral.sh/ruff/rules/#perflint-perf
+    "UP",    # pyupgrade              https://docs.astral.sh/ruff/rules/#pyupgrade-up
+]
+
+ignore = [
+    "ASYNC109", # own timeout implementation
+
+    "E501",     # line too long, handled by formatter later
+    "C901",     # too complex
+
+    # todo pep8-naming
+    "N817",     # CamelCase `*` imported as acronym `*`
+    "N815",     # Variable `*` in class scope should not be mixedCase
+    "N803",     # Argument name `expandMessageExamples` should be lowercase
+
+    # todo pydocstyle
+    "D100",     # missing docstring in public module
+    "D101",
+    "D102",
+    "D103",
+    "D104",     # missing docstring in public package
+    "D105",     # missing docstring in magic methods
+    "D106",     # missing docstring in public nested class
+    "D107",     # missing docstring in __init__
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**" = [
+    "D101",    # docstrings
+    "D102",
+    "D103",
+    "PLR2004", # magic-value-comparison
+    "S101",    # use assert
+]
+
+
+[tool.ruff.lint.isort]
+case-sensitive = true
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.flake8-bugbear]
+
+[tool.pytest.ini_options]
+minversion = "7.0"
+addopts = "-q -m 'not slow'"
+testpaths = ["tests"]
+markers = ["slow", "all"]
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.coverage.run]
+parallel = true
+branch = true
+concurrency = ["multiprocessing", "thread"]
+source = ["docs/docs_src", "examples", "faststream", "tests"]
+context = '${CONTEXT}'
+omit = ["**/__init__.py", "tests/mypy/*"]
+
+[tool.coverage.report]
+show_missing = true
+skip_empty = true
+exclude_also = [
+    "if __name__ == .__main__.:",
+    "self.logger",
+    "def __repr__",
+    "lambda: None",
+    "from .*",
+    "import .*",
+    '@(abc\.)?abstractmethod',
+    "raise NotImplementedError",
+    'raise AssertionError',
+    'logger\..*',
+    "pass",
+    '\.\.\.',
+]
+omit = [
+    '*/__about__.py',
+]
+
+[tool.bandit]
+
+[tool.codespell]
+skip = "./venv*,./docs/site/*,./htmlcov"
+ignore-words = ".codespell-whitelist.txt"
diff --git a/realtime_over_websockets/__init__.py b/realtime_over_websockets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/realtime_over_websockets/main.py b/realtime_over_websockets/main.py
new file mode 100644
index 0000000..d9434d1
--- /dev/null
+++ b/realtime_over_websockets/main.py
@@ -0,0 +1,79 @@
+import os
+from logging import getLogger
+from pathlib import Path
+from typing import Annotated
+
+from autogen.agentchat.realtime_agent import RealtimeAgent, WebSocketAudioAdapter
+from dotenv import load_dotenv
+from fastapi import FastAPI, Request, WebSocket
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+
+load_dotenv()
+
+realtime_llm_config = {
+    "timeout": 600,
+    "config_list": [
+        {
+            "model": "gpt-4o-mini-realtime-preview",
+            "api_key": os.getenv("OPENAI_API_KEY"),
+            "tags": ["gpt-4o-mini-realtime", "realtime"],
+        },
+    ],
+    "temperature": 0.8,
+}
+
+PORT = 5050
+
+app = FastAPI()
+
+
+@app.get("/", response_class=JSONResponse)
+async def index_page() -> dict[str, str]:
+    return {"message": "WebSocket Audio Stream Server is running!"}
+
+
+website_files_path = Path(__file__).parent / "website_files"
+
+app.mount(
+    "/static", StaticFiles(directory=website_files_path / "static"), name="static"
+)
+
+templates = Jinja2Templates(directory=website_files_path / "templates")
+
+
+@app.get("/start-chat/", response_class=HTMLResponse)
+async def start_chat(request: Request) -> HTMLResponse:
+    """Endpoint to return the HTML page for audio chat."""
+    port = PORT  # Extract the client's port
+    return templates.TemplateResponse("chat.html", {"request": request, "port": port})
+
+
+@app.websocket("/media-stream")
+async def handle_media_stream(websocket: WebSocket) -> None:
+    """Handle WebSocket connections providing audio stream and OpenAI."""
+    await websocket.accept()
+
+    logger = getLogger("uvicorn.error")
+
+    audio_adapter = WebSocketAudioAdapter(websocket, logger=logger)
+    realtime_agent = RealtimeAgent(
+        name="Weather Bot",
+        system_message="Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying 'How can I help you'?",
+        llm_config=realtime_llm_config,
+        audio_adapter=audio_adapter,
+        logger=logger,
+    )
+
+    @realtime_agent.register_realtime_function(  # type: ignore [misc]
+        name="get_weather", description="Get the current weather"
+    )
+    def get_weather(location: Annotated[str, "city"]) -> str:
+        return (
+            "The weather is cloudy."
+            if location == "Seattle"
+            else "The weather is sunny."
+        )
+
+    await realtime_agent.run()
diff --git a/realtime_over_websockets/website_files/static/Audio.js b/realtime_over_websockets/website_files/static/Audio.js
new file mode 100644
index 0000000..945456a
--- /dev/null
+++ b/realtime_over_websockets/website_files/static/Audio.js
@@ -0,0 +1,209 @@
+// Audio.js
+
+export class Audio {
+    constructor(webSocketUrl) {
+        this.webSocketUrl = webSocketUrl;
+        this.socket = null;
+        // audio out
+        this.outAudioContext = null;
+        this.sourceNode = null;
+        this.bufferQueue = [];  // Queue to store audio buffers
+        this.isPlaying = false; // Flag to check if audio is playing
+        // audio in
+        this.inAudioContext = null;
+        this.processorNode = null;
+        this.stream = null;
+        this.bufferSize = 8192;  // Define the buffer size for capturing chunks
+    }
+
+    // Initialize WebSocket and start receiving audio data
+    async start() {
+        try {
+            // Initialize WebSocket connection
+            this.socket = new WebSocket(this.webSocketUrl);
+
+            this.socket.onopen = () => {
+                console.log("WebSocket connected.");
+                const sessionStarted = {
+                    event: "start",
+                    start: {
+                        streamSid: crypto.randomUUID(),
+                    }
+                }
+                this.socket.send(JSON.stringify(sessionStarted))
+                console.log("sent session start")
+                };
+
+            this.socket.onclose = () => {
+                console.log("WebSocket disconnected.");
+            };
+
+            this.socket.onmessage = async (event) => {
+                console.log("Received web socket message")
+                const message = JSON.parse(event.data)
+                if (message.event == "media") {
+                    const bufferString = atob(message.media.payload); // Decode base64 to binary string
+                    const byteArray = new Uint8Array(bufferString.length);
+                    for (let i = 0; i < bufferString.length; i++) {
+                      byteArray[i] = bufferString.charCodeAt(i); //Create a byte array
+                    }
+                    //const payload = base64.decode(message.media.payload)
+                    // Ensure the data is an ArrayBuffer, if it's a Blob, convert it
+                    //const pcmData = event.data instanceof ArrayBuffer ? event.data : await event.data.arrayBuffer();
+                    //
+
+                    this.queuePcmData(byteArray.buffer);  // Push the received data into the buffer queue
+                    if (!this.isPlaying) {
+                            this.playFromQueue();  // Start playing if not already playing
+                    }
+                }
+            };
+            this.outAudioContext = new (window.AudioContext || window.webkitAudioContext)();
+            console.log("Audio player initialized.");
+
+            // audio in
+            // Get user media (microphone access)
+
+            const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate:24000}  });
+            this.stream = stream;
+            this.inAudioContext = new AudioContext({ sampleRate: 24000 });
+
+            // Create an AudioNode to capture the microphone stream
+            const sourceNode = this.inAudioContext.createMediaStreamSource(stream);
+
+            // Create a ScriptProcessorNode (or AudioWorkletProcessor for better performance)
+            this.processorNode = this.inAudioContext.createScriptProcessor(this.bufferSize, 1, 1);
+
+            // Process audio data when available
+            this.processorNode.onaudioprocess = (event) => {
+                const inputBuffer = event.inputBuffer;
+
+                // Extract PCM 16-bit data from input buffer (mono channel)
+                const audioData = this.extractPcm16Data(inputBuffer);
+                const byteArray = new Uint8Array(audioData); // Create a Uint8Array view
+                const bufferString = String.fromCharCode(...byteArray); // convert each byte of the buffer to a character
+                const audioBase64String = btoa(bufferString); // Apply base64
+                // Send the PCM data over the WebSocket
+                if (this.socket.readyState === WebSocket.OPEN) {
+                    const audioMessage = {
+                        'event': "media",
+                        'media': {
+                            'timestamp': Date.now(),
+                            'payload': audioBase64String
+                        }
+                    }
+                    this.socket.send(JSON.stringify(audioMessage));
+                }
+            };
+
+            // Connect the source node to the processor node and the processor node to the destination (speakers)
+            sourceNode.connect(this.processorNode);
+            this.processorNode.connect(this.inAudioContext.destination);
+            console.log("Audio capture started.");
+        } catch (err) {
+            console.error("Error initializing audio player:", err);
+        }
+    }
+
+    // Stop receiving and playing audio
+    stop() {
+        this.stop_out()
+        this.stop_in()
+    }
+
+    stop_out() {
+        if (this.socket) {
+            this.socket.close();
+        }
+        if (this.outAudioContext) {
+            this.outAudioContext.close();
+        }
+        console.log("Audio player stopped.");
+    }
+
+    stop_in() {
+        if (this.processorNode) {
+            this.processorNode.disconnect();
+        }
+        if (this.inAudioContext) {
+            this.inAudioContext.close();
+        }
+        if (this.socket) {
+            this.socket.close();
+        }
+        if (this.stream) {
+            this.stream.getTracks().forEach(track => track.stop());
+        }
+        console.log("Audio capture stopped.");
+    }
+
+    // Queue PCM data for later playback
+    queuePcmData(pcmData) {
+        this.bufferQueue.push(pcmData);
+    }
+
+    // Play audio from the queue
+    async playFromQueue() {
+        if (this.bufferQueue.length === 0) {
+            this.isPlaying = false; // No more data to play
+            return;
+        }
+
+        this.isPlaying = true;
+        const pcmData = this.bufferQueue.shift();  // Get the next chunk from the queue
+
+        // Convert PCM 16-bit data to ArrayBuffer
+        const audioBuffer = await this.decodePcm16Data(pcmData);
+
+        // Create an audio source and play it
+        const source = this.outAudioContext.createBufferSource();
+        source.buffer = audioBuffer;
+        source.connect(this.outAudioContext.destination);
+        source.onended = () => {
+            // Play the next chunk after the current one ends
+            this.playFromQueue();
+        };
+        source.start();
+    }
+
+    // Decode PCM 16-bit data into AudioBuffer
+    async decodePcm16Data(pcmData) {
+        const audioData = new Float32Array(pcmData.byteLength / 2);
+
+        // Convert PCM 16-bit to Float32Array
+        const dataView = new DataView(pcmData);
+        for (let i = 0; i < audioData.length; i++) {
+            const pcm16 = dataView.getInt16(i * 2, true); // true means little-endian
+            audioData[i] = pcm16 / 32768;  // Convert to normalized float (-1 to 1)
+        }
+
+        // Create an audio buffer from the Float32Array
+        const audioBuffer = this.outAudioContext.createBuffer(1, audioData.length, 24000);
+        audioBuffer.getChannelData(0).set(audioData);
+
+        return audioBuffer;
+    }
+
+    // Convert audio buffer to PCM 16-bit data
+    extractPcm16Data(buffer) {
+        const sampleRate = buffer.sampleRate;
+        const length = buffer.length;
+        const pcmData = new Int16Array(length);
+
+        // Convert the float samples to PCM 16-bit (scaled between -32768 and 32767)
+        for (let i = 0; i < length; i++) {
+            pcmData[i] = Math.max(-32768, Math.min(32767, buffer.getChannelData(0)[i] * 32767));
+        }
+
+        // Convert Int16Array to a binary buffer (ArrayBuffer)
+        const pcmBuffer = new ArrayBuffer(pcmData.length * 2); // 2 bytes per sample
+        const pcmView = new DataView(pcmBuffer);
+
+        for (let i = 0; i < pcmData.length; i++) {
+            pcmView.setInt16(i * 2, pcmData[i], true); // true means little-endian
+        }
+
+        return pcmBuffer;
+    }
+
+  }
diff --git a/realtime_over_websockets/website_files/static/main.js b/realtime_over_websockets/website_files/static/main.js
new file mode 100644
index 0000000..14b505e
--- /dev/null
+++ b/realtime_over_websockets/website_files/static/main.js
@@ -0,0 +1,6 @@
+import { Audio } from './Audio.js';
+
+// Create an instance of AudioPlayer with the WebSocket URL
+const audio = new Audio(socketUrl);
+// Start receiving and playing audio
+audio.start();
diff --git a/realtime_over_websockets/website_files/templates/chat.html b/realtime_over_websockets/website_files/templates/chat.html
new file mode 100644
index 0000000..2ee46ea
--- /dev/null
+++ b/realtime_over_websockets/website_files/templates/chat.html
@@ -0,0 +1,23 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Audio Chat</title>
+    <script>
+        // Dynamically set the WebSocket URLs using the injected port
+        const port = {{ port }};
+        const socketUrl = `ws://localhost:${port}/media-stream`;
+    </script>
+    <script src="/static/wavtools.js"></script>
+    <script>
+    // works as part of `window` object
+    const wavRecorder = new WavRecorder({ sampleRate: 24000 });
+    </script>
+    <script src="/static/main.js" type="module" defer></script>
+</head>
+<body>
+    <h1>Audio Chat</h1>
+    <p>Ensure microphone and speaker access is enabled.</p>
+</body>
+</html>
diff --git a/scripts/lint-pre-commit.sh b/scripts/lint-pre-commit.sh
new file mode 100755
index 0000000..9b4c7c7
--- /dev/null
+++ b/scripts/lint-pre-commit.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# from: https://jaredkhan.com/blog/mypy-pre-commit
+
+# A script for running mypy,
+# with all its dependencies installed.
+
+set -o errexit
+
+# Change directory to the project root directory.
+cd "$(dirname "$0")"/..
+
+# Install the dependencies into the mypy env.
+# Note that this can take seconds to run.
+# In my case, I need to use a custom index URL.
+# Avoid pip spending time quietly retrying since
+# likely cause of failure is lack of VPN connection.
+pip install --editable ".[dev]" \
+ --retries 1 \
+ --no-input \
+ --quiet
+
+# Run on all files,
+# ignoring the paths passed to this script,
+# so as not to miss type errors.
+# My repo makes use of namespace packages.
+# Use the namespace-packages flag
+# and specify the package to run on explicitly.
+# Note that we do not use --ignore-missing-imports,
+# as this can give us false confidence in our results.
+./scripts/lint.sh
diff --git a/scripts/lint.sh b/scripts/lint.sh
new file mode 100755
index 0000000..e9d341a
--- /dev/null
+++ b/scripts/lint.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -e
+
+echo "Running ruff linter (isort, flake, pyupgrade, etc. replacement)..."
+ruff check --exit-non-zero-on-fix
+
+echo "Running ruff formatter (black replacement)..."
+ruff format
+
+echo "Running codespell to find typos..."
+codespell
diff --git a/scripts/static-analysis.sh b/scripts/static-analysis.sh
new file mode 100755
index 0000000..4edd6f1
--- /dev/null
+++ b/scripts/static-analysis.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -e
+
+echo "Running mypy..."
+mypy
+
+echo "Running bandit..."
+bandit -c pyproject.toml -r realtime_over_websockets
+
+echo "Running semgrep..."
+semgrep scan --config auto --error
diff --git a/scripts/static-pre-commit.sh b/scripts/static-pre-commit.sh
new file mode 100755
index 0000000..d984100
--- /dev/null
+++ b/scripts/static-pre-commit.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+# taken from: https://jaredkhan.com/blog/mypy-pre-commit
+
+# A script for running mypy,
+# with all its dependencies installed.
+
+set -o errexit
+
+# Change directory to the project root directory.
+cd "$(dirname "$0")"/..
+
+# Install the dependencies into the mypy env.
+# Note that this can take seconds to run.
+# In my case, I need to use a custom index URL.
+# Avoid pip spending time quietly retrying since
+# likely cause of failure is lack of VPN connection.
+pip install --editable ".[dev]" \
+ --retries 1 \
+ --no-input \
+ --quiet
+
+# Run on all files,
+# ignoring the paths passed to this script,
+# so as not to miss type errors.
+# My repo makes use of namespace packages.
+# Use the namespace-packages flag
+# and specify the package to run on explicitly.
+# Note that we do not use --ignore-missing-imports,
+# as this can give us false confidence in our results.
+# mypy faststream
+./scripts/static-analysis.sh

From 8bf2ee92f3cf4e6c3fa665beb6dfc7591df4dd94 Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 11:24:16 +0100
Subject: [PATCH 2/6] Cleanup README

---
 README.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 38fe612..a08f3b5 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,49 @@
-# RealtimeAgent-WebSocketAudioAdapter
+
+# **RealtimeAgent over WebSockets**
+
+This project demonstrates how to create a voice assistant using Python, FastAPI, WebSockets, and an AG2 RealtimeAgent. The application streams audio from a browser to a FastAPI server and enables real-time voice communication with the RealtimeAgent.
+
+## **Key Features**
+- **WebSocket Audio Streaming**: Direct real-time audio streaming between the browser and server.
+- **FastAPI Integration**: A lightweight Python backend for handling WebSocket traffic.
+
+## **Prerequisites**
+
+Before you begin, ensure you have the following:
+- **Python 3.9+**: The project was tested with `3.9`. Download [here](https://www.python.org/downloads/).
+- **API Access**: Access to the OpenAI API (credentials required).
+
+## **Local Setup**
+
+Follow these steps to set up the project locally:
+
+### **1. Clone the Repository**
+```bash
+git clone https://github.com/sternakt/RealtimeAgent-WebSocketAudioAdapter.git
+cd RealtimeAgent-WebSocketAudioAdapter
+```
+
+### **2. Set Up Environment Variables**
+Create a `.env` file based on the provided `.env.example`:
+```bash
+cp .env.example .env
+```
+Add your OPENAI API credentials to the `.env` file.
+
+### **3. Install Dependencies**
+Install the required Python packages using `pip`:
+```bash
+pip install .
+```
+
+### **4. Start the Server**
+Run the application with Uvicorn:
+```bash
+uvicorn realtime_over_websockets.main:app --port 5050
+```
+
+## **Test the App**
+With the server running, open the client application in your browser. Speak into your microphone, and the AI assistant will respond in real time.
+
+## **License**
+This project is licensed under the [MIT License](LICENSE).

From 9fa18b8a7f3dbb623d28c29811395e7f7018de30 Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 11:31:08 +0100
Subject: [PATCH 3/6] Add pre-commit workflow

---
 .github/workflows/pre-commit.yaml | 32 +++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .github/workflows/pre-commit.yaml

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
new file mode 100644
index 0000000..024c032
--- /dev/null
+++ b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,32 @@
+name: Pre-commit Checks
+
+on:
+  push:
+    branches:
+      - main
+      - '**' # Runs on all branches
+  pull_request:
+
+jobs:
+  pre-commit:
+    name: Run Pre-commit Hooks
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ".[dev]"
+
+      - name: Run pre-commit
+        run: |
+          pre-commit install
+          pre-commit run --all-files

From 31aba2ac5af3acbf377464657a16f48257b56896 Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 11:35:26 +0100
Subject: [PATCH 4/6] Update pre-commit

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cb627a0..968ac40 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@
 default_stages: [pre-commit, pre-merge-commit]
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
     -   id: trailing-whitespace
         stages: [pre-commit, pre-merge-commit, manual]

From f8dae11b2b0fbd8f0afb22281670ae607835159b Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 11:43:33 +0100
Subject: [PATCH 5/6] Update pre-commit

---
 .github/workflows/pre-commit.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index 024c032..f247ff1 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -5,7 +5,6 @@ on:
     branches:
       - main
       - '**' # Runs on all branches
-  pull_request:
 
 jobs:
   pre-commit:

From 9339f5ed7a7b0f198100d619eed6767e117174b3 Mon Sep 17 00:00:00 2001
From: Tvrtko Sternak <sternakt@gmail.com>
Date: Fri, 3 Jan 2025 12:34:15 +0100
Subject: [PATCH 6/6] Add pytest to project

---
 .github/workflows/test.yaml | 30 ++++++++++++++++++++++++++++++
 pyproject.toml              |  5 ++++-
 tests/test_app.py           | 11 +++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/test.yaml
 create mode 100644 tests/test_app.py

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
new file mode 100644
index 0000000..d6d30db
--- /dev/null
+++ b/.github/workflows/test.yaml
@@ -0,0 +1,30 @@
+name: Run Pytest
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: 3.9
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e ".[dev]"
+
+    - name: Run pytest
+      run: pytest tests/
diff --git a/pyproject.toml b/pyproject.toml
index e0bed15..5441a10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,10 @@ lint = [
     "codespell==2.3.0",
 ]
 
-testing = []
+testing = [
+    "pytest==8.3.4",
+    "pytest-asyncio==0.25.0",
+]
 
 dev = [
     "realtime-over-websockets[type,lint,testing]",
diff --git a/tests/test_app.py b/tests/test_app.py
new file mode 100644
index 0000000..069739d
--- /dev/null
+++ b/tests/test_app.py
@@ -0,0 +1,11 @@
+from fastapi.testclient import TestClient
+
+from realtime_over_websockets.main import app
+
+client = TestClient(app)
+
+
+def test_index_page():
+    response = client.get("/")
+    assert response.status_code == 200
+    assert response.json() == {"message": "WebSocket Audio Stream Server is running!"}