add gpt optimization to live transcription

kurianbenoy · Mar 8, 2024 · 5a60cca · 5a60cca
1 parent db02c86
commit 5a60cca
Show file tree

Hide file tree

Showing 4 changed files with 104 additions and 7 deletions.
diff --git a/ui/src/components/generate/UploadFile.jsx b/ui/src/components/generate/UploadFile.jsx
@@ -88,11 +88,11 @@ const UploadFile = ({
         case "language_detection":
           const language_identified = jsonData["data"];
           toast.info("Language identified as " + language_identified, {
-            delay: 8000,
+            autoClose: 8000,
           });
           return true;
         case "info":
-          toast.info(jsonData.data);
+          toast.info(jsonData.data, { autoClose: 8000 });
           return true;
         case "error":
           toast.error(jsonData.data);

diff --git a/ui/src/pages/api/optimize.js b/ui/src/pages/api/optimize.js
@@ -0,0 +1,45 @@
+import axios from "axios";
+
+export default async function handler(req, res) {
+  if (req.method === "POST") {
+    const { transcription } = req.body;
+
+    try {
+      const response = await axios.post(
+        "https://api.openai.com/v1/chat/completions",
+        {
+          model: "gpt-3.5-turbo",
+          messages: [
+            {
+              role: "system",
+              content:
+                "You will be provided with an audio transcription, and your task is to correct any mistakes like grammar and make improvements to the audio. The transcription will be provided in side 3 backticks in the format: Transcription : ```transcription``` ",
+            },
+            {
+              role: "user",
+              content: "Transcription : ```" + transcription + "```",
+            },
+          ],
+          temperature: 0.3,
+          // max_tokens: 64,
+          top_p: 1,
+        },
+        {
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
+          },
+        }
+      );
+
+      res.status(200).json({
+        corrected_transcription: response.data.choices[0].message.content,
+      });
+    } catch (error) {
+      console.error("Error optimizing transcription:", error);
+      res.status(500).json({ error: "Error optimizing transcription" });
+    }
+  } else {
+    res.status(405).json({ error: "Method Not Allowed" });
+  }
+}
diff --git a/ui/src/pages/api/transcribe.js b/ui/src/pages/api/transcribe.js
@@ -3,14 +3,15 @@ import axios from "axios";
 export default async function handler(req, res) {
   if (req.method === "POST") {
     try {
-      const { audioData } = req.body;
+      const { audioData, language } = req.body;
       const audioBuffer = Buffer.from(audioData, "base64");
       const audioBlob = new Blob([audioBuffer], { type: "audio/wav" });
 
       const formData = new FormData();
       formData.append("file", audioBlob, "audio.wav");
       formData.append("model", "whisper-1");
       formData.append("response_format", "text");
+      // formData.append("language", language);
 
       const response = await axios.post(
         "https://api.openai.com/v1/audio/transcriptions",

diff --git a/ui/src/pages/livetranscribe.js b/ui/src/pages/livetranscribe.js
@@ -1,8 +1,12 @@
+import axios from "axios";
 import React, { useState, useRef, useEffect } from "react";
+import { toast } from "react-toastify";
 
 const LiveTranscribe = () => {
   const [isRecording, setIsRecording] = useState(false);
   const [transcription, setTranscription] = useState("");
+  const [gptOptimizedTranscription, setGptOptimizedTranscription] =
+    useState("");
   const mediaRecorderRef = useRef(null);
   const streamRef = useRef(null);
   const [enableTranscription, setEnableTranscription] = useState(false);
@@ -73,6 +77,7 @@ const LiveTranscribe = () => {
           headers: {
             "Content-Type": "application/json",
           },
+          // body: JSON.stringify({ audioData: base64Data, language: "en" }),
           body: JSON.stringify({ audioData: base64Data }),
         });
 
@@ -95,15 +100,36 @@ const LiveTranscribe = () => {
     setEnableTranscription((currState) => !currState);
   };
 
+  const handleOptimizeWithGpt = async () => {
+    const toastId = toast.info("Optimizing with LLM...");
+    try {
+      const response = await axios.post("/api/optimize", {
+        transcription: transcription,
+      });
+      console.log(
+        "Optimized transcription:",
+        response.data.corrected_transcription
+      );
+      setGptOptimizedTranscription(response.data.corrected_transcription);
+      toast.update(toastId, {
+        render: "Optimized with LLM",
+        type: "success",
+        autoClose: 5000,
+      });
+    } catch (error) {
+      console.error("Error optimizing transcription:", error);
+    }
+  };
+
   return (
-    <div className="prose mx-auto">
+    <div className="prose mx-auto pb-5 mb-5">
       <h1>Live Transcription</h1>
       <div className="h-80" ref={transcriptionContainerRef}>
         {/* {!enableTranscription ? ( */}
         <textarea
           value={transcription}
           rows={10}
-          placeholder="Enable Transcription and start speaking..."
+          placeholder="Enable Transcription and start speaking.Don't mind about grammar or accuracy. Let your thoughts flow freely..."
           onChange={(e) => setTranscription(e.target.value)}
           className="w-full border rounded p-4"
         />
@@ -122,13 +148,38 @@ const LiveTranscribe = () => {
 
       <p>{isRecording ? "Recording in progress...." : "Ready"} </p>
 
-      <btn className="btn btn-accent mr-5" onClick={handleEnableTranscription}>
+      <btn className="btn btn-accent mr-2" onClick={handleEnableTranscription}>
         {enableTranscription ? "Disable" : "Enable"} Transcription
       </btn>
 
-      <button className="btn btn-outline" onClick={() => setTranscription("")}>
+      <button
+        className="btn btn-neutral mx-2"
+        disabled={!transcription?.length}
+        onClick={handleOptimizeWithGpt}
+      >
+        Optimize Transcription
+      </button>
+
+      <button
+        className="btn btn-outline mx-2"
+        disabled={!transcription?.length}
+        onClick={() => setTranscription("")}
+      >
         Clear Transcript
       </button>
+
+      {gptOptimizedTranscription && (
+        <div className="mt-6 p-2 px-4 mb-4 border border-gray-200 rounded-lg">
+          <h2 className="text-xl font-bold mb-1">Optimized Transcription</h2>
+          <h5 className="text-lg font-semibold">
+            Transcription after processed by an LLM
+          </h5>
+          <p className="text-sm text-gray-600 mb-1">
+            Note: This works well mainly for English at the moment
+          </p>
+          <p className="text-base mt-6">{gptOptimizedTranscription}</p>
+        </div>
+      )}
     </div>
   );
 };