Skip to content

Commit

Permalink
add gpt optimization to live transcription
Browse files Browse the repository at this point in the history
  • Loading branch information
aldrinjenson committed Mar 8, 2024
1 parent db02c86 commit 5a60cca
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 7 deletions.
4 changes: 2 additions & 2 deletions ui/src/components/generate/UploadFile.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ const UploadFile = ({
case "language_detection":
const language_identified = jsonData["data"];
toast.info("Language identified as " + language_identified, {
delay: 8000,
autoClose: 8000,
});
return true;
case "info":
toast.info(jsonData.data);
toast.info(jsonData.data, { autoClose: 8000 });
return true;
case "error":
toast.error(jsonData.data);
Expand Down
45 changes: 45 additions & 0 deletions ui/src/pages/api/optimize.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import axios from "axios";

export default async function handler(req, res) {
if (req.method === "POST") {
const { transcription } = req.body;

try {
const response = await axios.post(
"https://api.openai.com/v1/chat/completions",
{
model: "gpt-3.5-turbo",
messages: [
{
role: "system",
content:
"You will be provided with an audio transcription, and your task is to correct any mistakes like grammar and make improvements to the audio. The transcription will be provided in side 3 backticks in the format: Transcription : ```transcription``` ",
},
{
role: "user",
content: "Transcription : ```" + transcription + "```",
},
],
temperature: 0.3,
// max_tokens: 64,
top_p: 1,
},
{
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
},
}
);

res.status(200).json({
corrected_transcription: response.data.choices[0].message.content,
});
} catch (error) {
console.error("Error optimizing transcription:", error);
res.status(500).json({ error: "Error optimizing transcription" });
}
} else {
res.status(405).json({ error: "Method Not Allowed" });
}
}
3 changes: 2 additions & 1 deletion ui/src/pages/api/transcribe.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ import axios from "axios";
export default async function handler(req, res) {
if (req.method === "POST") {
try {
const { audioData } = req.body;
const { audioData, language } = req.body;
const audioBuffer = Buffer.from(audioData, "base64");
const audioBlob = new Blob([audioBuffer], { type: "audio/wav" });

const formData = new FormData();
formData.append("file", audioBlob, "audio.wav");
formData.append("model", "whisper-1");
formData.append("response_format", "text");
// formData.append("language", language);

const response = await axios.post(
"https://api.openai.com/v1/audio/transcriptions",
Expand Down
59 changes: 55 additions & 4 deletions ui/src/pages/livetranscribe.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import axios from "axios";
import React, { useState, useRef, useEffect } from "react";
import { toast } from "react-toastify";

const LiveTranscribe = () => {
const [isRecording, setIsRecording] = useState(false);
const [transcription, setTranscription] = useState("");
const [gptOptimizedTranscription, setGptOptimizedTranscription] =
useState("");
const mediaRecorderRef = useRef(null);
const streamRef = useRef(null);
const [enableTranscription, setEnableTranscription] = useState(false);
Expand Down Expand Up @@ -73,6 +77,7 @@ const LiveTranscribe = () => {
headers: {
"Content-Type": "application/json",
},
// body: JSON.stringify({ audioData: base64Data, language: "en" }),
body: JSON.stringify({ audioData: base64Data }),
});

Expand All @@ -95,15 +100,36 @@ const LiveTranscribe = () => {
setEnableTranscription((currState) => !currState);
};

const handleOptimizeWithGpt = async () => {
const toastId = toast.info("Optimizing with LLM...");
try {
const response = await axios.post("/api/optimize", {
transcription: transcription,
});
console.log(
"Optimized transcription:",
response.data.corrected_transcription
);
setGptOptimizedTranscription(response.data.corrected_transcription);
toast.update(toastId, {
render: "Optimized with LLM",
type: "success",
autoClose: 5000,
});
} catch (error) {
console.error("Error optimizing transcription:", error);
}
};

return (
<div className="prose mx-auto">
<div className="prose mx-auto pb-5 mb-5">
<h1>Live Transcription</h1>
<div className="h-80" ref={transcriptionContainerRef}>
{/* {!enableTranscription ? ( */}
<textarea
value={transcription}
rows={10}
placeholder="Enable Transcription and start speaking..."
placeholder="Enable Transcription and start speaking.Don't mind about grammar or accuracy. Let your thoughts flow freely..."
onChange={(e) => setTranscription(e.target.value)}
className="w-full border rounded p-4"
/>
Expand All @@ -122,13 +148,38 @@ const LiveTranscribe = () => {

<p>{isRecording ? "Recording in progress...." : "Ready"} </p>

<btn className="btn btn-accent mr-5" onClick={handleEnableTranscription}>
<btn className="btn btn-accent mr-2" onClick={handleEnableTranscription}>
{enableTranscription ? "Disable" : "Enable"} Transcription
</btn>

<button className="btn btn-outline" onClick={() => setTranscription("")}>
<button
className="btn btn-neutral mx-2"
disabled={!transcription?.length}
onClick={handleOptimizeWithGpt}
>
Optimize Transcription
</button>

<button
className="btn btn-outline mx-2"
disabled={!transcription?.length}
onClick={() => setTranscription("")}
>
Clear Transcript
</button>

{gptOptimizedTranscription && (
<div className="mt-6 p-2 px-4 mb-4 border border-gray-200 rounded-lg">
<h2 className="text-xl font-bold mb-1">Optimized Transcription</h2>
<h5 className="text-lg font-semibold">
Transcription after processed by an LLM
</h5>
<p className="text-sm text-gray-600 mb-1">
Note: This works well mainly for English at the moment
</p>
<p className="text-base mt-6">{gptOptimizedTranscription}</p>
</div>
)}
</div>
);
};
Expand Down

0 comments on commit 5a60cca

Please sign in to comment.