iterate

yoziru · Mar 27, 2024 · ca709c2 · ca709c2
1 parent f1ff799
commit ca709c2
Show file tree

Hide file tree

Showing 32 changed files with 544 additions and 1,217 deletions.
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ The easiest way to get started is to use the pre-built Docker image.
 docker run --rm -d -p 3000:3000 -e VLLM_URL=http://host.docker.internal:8000 ghcr.io/yoziru/nextjs-vllm-ui:latest
 ```
 
-Then go to [localhost:3000](http://localhost:3000/vllm) and start chatting with your favourite model!
+Then go to [localhost:3000](http://localhost:3000) and start chatting with your favourite model!
 
 # Development 📖
 
@@ -67,7 +67,7 @@ cd nextjs-ollama-llm-ui
 mv .example.env .env
 ```
 
-**4. If your instance of Ollama is NOT running on the default ip-address and port, change the variable in the .env file to fit your usecase:**
+**4. If your instance of vLLM is NOT running on the default ip-address and port, change the variable in the .env file to fit your usecase:**
 
 ```
 VLLM_URL="http://localhost:8000"

diff --git a/package.json b/package.json
@@ -10,22 +10,17 @@
  },
  "dependencies": {
  "@hookform/resolvers": "^3.3.4",
- "@langchain/community": "^0.0.43",
- "@langchain/core": "^0.1.51",
  "@radix-ui/react-avatar": "^1.0.4",
  "@radix-ui/react-dialog": "^1.0.5",
- "@radix-ui/react-dropdown-menu": "^2.0.6",
  "@radix-ui/react-icons": "^1.3.0",
  "@radix-ui/react-label": "^2.0.2",
- "@radix-ui/react-popover": "^1.0.7",
  "@radix-ui/react-scroll-area": "^1.0.5",
  "@radix-ui/react-select": "^2.0.0",
  "@radix-ui/react-slot": "^1.0.2",
  "@radix-ui/react-tooltip": "^1.0.7",
  "ai": "^3.0.14",
  "class-variance-authority": "^0.7.0",
  "clsx": "^2.1.0",
- "langchain": "^0.1.30",
  "lucide-react": "^0.363.0",
  "next": "14.1.4",
  "next-themes": "^0.3.0",

diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts
@@ -1,13 +1,15 @@
-import { StreamingTextResponse, Message } from "ai";
 import {
- AIMessage,
- HumanMessage,
- SystemMessage,
-} from "@langchain/core/messages";
-import { BytesOutputParser } from "@langchain/core/output_parsers";
-import { ChatOpenAI } from "@langchain/openai";
+ createParser,
+ ParsedEvent,
+ ReconnectInterval,
+} from "eventsource-parser";
 import { NextRequest, NextResponse } from "next/server";
 
+export interface Message {
+ role: "user" | "assistant" | "system";
+ content: string;
+}
+
 const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
  // early exit if system prompt is empty
  if (!systemPrompt || systemPrompt === "") {
@@ -20,15 +22,13 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
  // if there are no messages, add the system prompt as the first message
  messages = [
  {
- id: "1",
  content: systemPrompt,
  role: "system",
  },
  ];
  } else if (messages.length === 0) {
  // if there are no messages, add the system prompt as the first message
  messages.push({
- id: "1",
  content: systemPrompt,
  role: "system",
  });
@@ -40,7 +40,6 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
  } else {
  // if the first message is not a system prompt, add the system prompt as the first message
  messages.unshift({
- id: "1",
  content: systemPrompt,
  role: "system",
  });
@@ -49,42 +48,150 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
  return messages;
 };
 
-const formatMessages = (messages: Message[]) => {
+const formatMessages = (messages: Message[]): Message[] => {
  return messages.map((m) => {
  if (m.role === "system") {
- return new SystemMessage(m.content);
+ return { role: "system", content: m.content } as Message;
  } else if (m.role === "user") {
- return new HumanMessage(m.content);
+ return { role: "user", content: m.content } as Message;
  } else {
- return new AIMessage(m.content);
+ return { role: "assistant", content: m.content } as Message;
  }
  });
 };
 
+// export async function POST(req: NextRequest) {
+// const { messages, chatOptions } = await req.json();
+// if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
+// throw new Error("Selected model is required");
+// }
+
+// const baseUrl = process.env.VLLM_URL + "/v1";
+// const model = new ChatOpenAI({
+// openAIApiKey: "foo",
+// configuration: {
+// baseURL: baseUrl,
+// },
+// modelName: chatOptions.selectedModel,
+// temperature: chatOptions.temperature,
+// });
+
+// const parser = new BytesOutputParser();
+// const formattedMessages = formatMessages(
+// addSystemMessage(messages, chatOptions.systemPrompt)
+// );
+// try {
+// const stream = await model.pipe(parser).stream(formattedMessages);
+// return new StreamingTextResponse(stream);
+// } catch (e: any) {
+// return NextResponse.json({ error: e.message }, { status: e.status ?? 500 });
+// }
+// }
+
 export async function POST(req: NextRequest) {
- const { messages, chatOptions } = await req.json();
- if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
- throw new Error("Selected model is required");
+ try {
+ const { messages, chatOptions } = await req.json();
+ if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
+ throw new Error("Selected model is required");
+ }
+
+ const baseUrl = process.env.VLLM_URL;
+ if (!baseUrl) {
+ throw new Error("VLLM_URL is not set");
+ }
+ const formattedMessages = formatMessages(
+ addSystemMessage(messages, chatOptions.systemPrompt)
+ );
+
+ const stream = await getOpenAIStream(
+ baseUrl,
+ chatOptions.selectedModel,
+ formattedMessages,
+ chatOptions.temperature
+ );
+ return new NextResponse(stream, {
+ headers: { "Content-Type": "text/event-stream" },
+ });
+ } catch (error) {
+ console.error(error);
+ return NextResponse.json(
+ {
+ success: false,
+ error: error instanceof Error ? error.message : "Unknown error",
+ },
+ { status: 500 }
+ );
  }
+}
 
- const baseUrl = process.env.VLLM_URL + "/v1";
- const model = new ChatOpenAI({
- openAIApiKey: "foo",
- configuration: {
- baseURL: baseUrl,
- },
- modelName: chatOptions.selectedModel,
- temperature: chatOptions.temperature,
+const getOpenAIStream = async (
+ apiUrl: string,
+ model: string,
+ messages: Message[],
+ temperature?: number,
+ apiKey?: string
+) => {
+ const encoder = new TextEncoder();
+ const decoder = new TextDecoder();
+ const headers = new Headers();
+ headers.set("Content-Type", "application/json");
+ if (apiKey !== undefined) {
+ headers.set("Authorization", `Bearer ${apiKey}`);
+ headers.set("api-key", apiKey);
+ }
+ const res = await fetch(apiUrl + "/v1/chat/completions",{
+ headers: headers,
+ method: "POST",
+ body: JSON.stringify({
+ model: model,
+ // frequency_penalty: 0,
+ // max_tokens: 2000,
+ messages: messages,
+ // presence_penalty: 0,
+ stream: true,
+ temperature: temperature ?? 0.5,
+ // top_p: 0.95,
+ }),
  });
 
- const parser = new BytesOutputParser();
- const formattedMessages = formatMessages(
- addSystemMessage(messages, chatOptions.systemPrompt)
- );
- try {
- const stream = await model.pipe(parser).stream(formattedMessages);
- return new StreamingTextResponse(stream);
- } catch (e: any) {
- return NextResponse.json({ error: e.message }, { status: e.status ?? 500 });
+ if (res.status !== 200) {
+ const statusText = res.statusText;
+ const responseBody = await res.text();
+ console.error(`vLLM API response error: ${responseBody}`);
+ throw new Error(
+ `The vLLM API has encountered an error with a status code of ${res.status} ${statusText}: ${responseBody}`
+ );
  }
-}
+
+ return new ReadableStream({
+ async start(controller) {
+ const onParse = (event: ParsedEvent | ReconnectInterval) => {
+ if (event.type === "event") {
+ const data = event.data;
+
+ if (data === "[DONE]") {
+ controller.close();
+ return;
+ }
+
+ try {
+ const json = JSON.parse(data);
+ const text = json.choices[0].delta.content;
+ const queue = encoder.encode(text);
+ controller.enqueue(queue);
+ } catch (e) {
+ controller.error(e);
+ }
+ }
+ };
+
+ const parser = createParser(onParse);
+
+ for await (const chunk of res.body as any) {
+ // An extra newline is required to make AzureOpenAI work.
+ const str = decoder.decode(chunk).replace("[DONE]\n", "[DONE]\n\n");
+ parser.feed(str);
+ }
+ },
+ });
+};
diff --git a/src/app/api/models/route.ts b/src/app/api/models/route.ts
@@ -1,7 +1,29 @@
-export async function GET(req: Request) {
- const res = await fetch(process.env.VLLM_URL + "/v1/models");
- return new Response(res.body, res);
-}
+import { NextRequest, NextResponse } from "next/server";
 
-// forces the route handler to be dynamic
-export const dynamic = "force-dynamic";
+export async function GET(req: NextRequest) {
+ try {
+ const res = await fetch(process.env.VLLM_URL + "/v1/models");
+ if (res.status !== 200) {
+ const statusText = res.statusText;
+ const responseBody = await res.text();
+ console.error(`vLLM /api/models response error: ${responseBody}`);
+ return NextResponse.json(
+ {
+ success: false,
+ error: statusText,
+ },
+ { status: res.status }
+ );
+ }
+ return new Response(res.body, res);
+ } catch (error) {
+ console.error(error);
+ return NextResponse.json(
+ {
+ success: false,
+ error: error instanceof Error ? error.message : "Unknown error",
+ },
+ { status: 500 }
+ );
+ }
+}
diff --git a/src/app/layout.tsx b/src/app/layout.tsx
@@ -8,8 +8,8 @@ export const runtime = "edge"; // 'nodejs' (default) | 'edge'
 const inter = Inter({ subsets: ["latin"] });
 
 export const metadata: Metadata = {
- title: "Ollama UI",
- description: "Ollama chatbot web interface",
+ title: "vLLM UI",
+ description: "vLLM chatbot web interface",
 };
 
 export const viewport = {