Skip to content

Commit

Permalink
iterate
Browse files Browse the repository at this point in the history
  • Loading branch information
yoziru committed Mar 27, 2024
1 parent f1ff799 commit ca709c2
Show file tree
Hide file tree
Showing 32 changed files with 544 additions and 1,217 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ The easiest way to get started is to use the pre-built Docker image.
docker run --rm -d -p 3000:3000 -e VLLM_URL=http://host.docker.internal:8000 ghcr.io/yoziru/nextjs-vllm-ui:latest
```

Then go to [localhost:3000](http://localhost:3000/vllm) and start chatting with your favourite model!
Then go to [localhost:3000](http://localhost:3000) and start chatting with your favourite model!

# Development 📖

Expand All @@ -67,7 +67,7 @@ cd nextjs-ollama-llm-ui
mv .example.env .env
```

**4. If your instance of Ollama is NOT running on the default ip-address and port, change the variable in the .env file to fit your usecase:**
**4. If your instance of vLLM is NOT running on the default ip-address and port, change the variable in the .env file to fit your usecase:**

```
VLLM_URL="http://localhost:8000"
Expand Down
5 changes: 0 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,17 @@
},
"dependencies": {
"@hookform/resolvers": "^3.3.4",
"@langchain/community": "^0.0.43",
"@langchain/core": "^0.1.51",
"@radix-ui/react-avatar": "^1.0.4",
"@radix-ui/react-dialog": "^1.0.5",
"@radix-ui/react-dropdown-menu": "^2.0.6",
"@radix-ui/react-icons": "^1.3.0",
"@radix-ui/react-label": "^2.0.2",
"@radix-ui/react-popover": "^1.0.7",
"@radix-ui/react-scroll-area": "^1.0.5",
"@radix-ui/react-select": "^2.0.0",
"@radix-ui/react-slot": "^1.0.2",
"@radix-ui/react-tooltip": "^1.0.7",
"ai": "^3.0.14",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"langchain": "^0.1.30",
"lucide-react": "^0.363.0",
"next": "14.1.4",
"next-themes": "^0.3.0",
Expand Down
177 changes: 142 additions & 35 deletions src/app/api/chat/route.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import { StreamingTextResponse, Message } from "ai";
import {
AIMessage,
HumanMessage,
SystemMessage,
} from "@langchain/core/messages";
import { BytesOutputParser } from "@langchain/core/output_parsers";
import { ChatOpenAI } from "@langchain/openai";
createParser,
ParsedEvent,
ReconnectInterval,
} from "eventsource-parser";
import { NextRequest, NextResponse } from "next/server";

export interface Message {
role: "user" | "assistant" | "system";
content: string;
}

const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
// early exit if system prompt is empty
if (!systemPrompt || systemPrompt === "") {
Expand All @@ -20,15 +22,13 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
// if there are no messages, add the system prompt as the first message
messages = [
{
id: "1",
content: systemPrompt,
role: "system",
},
];
} else if (messages.length === 0) {
// if there are no messages, add the system prompt as the first message
messages.push({
id: "1",
content: systemPrompt,
role: "system",
});
Expand All @@ -40,7 +40,6 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
} else {
// if the first message is not a system prompt, add the system prompt as the first message
messages.unshift({
id: "1",
content: systemPrompt,
role: "system",
});
Expand All @@ -49,42 +48,150 @@ const addSystemMessage = (messages: Message[], systemPrompt?: string) => {
return messages;
};

const formatMessages = (messages: Message[]) => {
const formatMessages = (messages: Message[]): Message[] => {
return messages.map((m) => {
if (m.role === "system") {
return new SystemMessage(m.content);
return { role: "system", content: m.content } as Message;
} else if (m.role === "user") {
return new HumanMessage(m.content);
return { role: "user", content: m.content } as Message;
} else {
return new AIMessage(m.content);
return { role: "assistant", content: m.content } as Message;
}
});
};

// export async function POST(req: NextRequest) {
// const { messages, chatOptions } = await req.json();
// if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
// throw new Error("Selected model is required");
// }

// const baseUrl = process.env.VLLM_URL + "/v1";
// const model = new ChatOpenAI({
// openAIApiKey: "foo",
// configuration: {
// baseURL: baseUrl,
// },
// modelName: chatOptions.selectedModel,
// temperature: chatOptions.temperature,
// });

// const parser = new BytesOutputParser();
// const formattedMessages = formatMessages(
// addSystemMessage(messages, chatOptions.systemPrompt)
// );
// try {
// const stream = await model.pipe(parser).stream(formattedMessages);
// return new StreamingTextResponse(stream);
// } catch (e: any) {
// return NextResponse.json({ error: e.message }, { status: e.status ?? 500 });
// }
// }

export async function POST(req: NextRequest) {
const { messages, chatOptions } = await req.json();
if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
throw new Error("Selected model is required");
try {
const { messages, chatOptions } = await req.json();
if (!chatOptions.selectedModel || chatOptions.selectedModel === "") {
throw new Error("Selected model is required");
}

const baseUrl = process.env.VLLM_URL;
if (!baseUrl) {
throw new Error("VLLM_URL is not set");
}
const formattedMessages = formatMessages(
addSystemMessage(messages, chatOptions.systemPrompt)
);

const stream = await getOpenAIStream(
baseUrl,
chatOptions.selectedModel,
formattedMessages,
chatOptions.temperature
);
return new NextResponse(stream, {
headers: { "Content-Type": "text/event-stream" },
});
} catch (error) {
console.error(error);
return NextResponse.json(
{
success: false,
error: error instanceof Error ? error.message : "Unknown error",
},
{ status: 500 }
);
}
}

const baseUrl = process.env.VLLM_URL + "/v1";
const model = new ChatOpenAI({
openAIApiKey: "foo",
configuration: {
baseURL: baseUrl,
},
modelName: chatOptions.selectedModel,
temperature: chatOptions.temperature,
const getOpenAIStream = async (
apiUrl: string,
model: string,
messages: Message[],
temperature?: number,
apiKey?: string
) => {
const encoder = new TextEncoder();
const decoder = new TextDecoder();
const headers = new Headers();
headers.set("Content-Type", "application/json");
if (apiKey !== undefined) {
headers.set("Authorization", `Bearer ${apiKey}`);
headers.set("api-key", apiKey);
}
const res = await fetch(apiUrl + "/v1/chat/completions",{
headers: headers,
method: "POST",
body: JSON.stringify({
model: model,
// frequency_penalty: 0,
// max_tokens: 2000,
messages: messages,
// presence_penalty: 0,
stream: true,
temperature: temperature ?? 0.5,
// top_p: 0.95,
}),
});

const parser = new BytesOutputParser();
const formattedMessages = formatMessages(
addSystemMessage(messages, chatOptions.systemPrompt)
);
try {
const stream = await model.pipe(parser).stream(formattedMessages);
return new StreamingTextResponse(stream);
} catch (e: any) {
return NextResponse.json({ error: e.message }, { status: e.status ?? 500 });
if (res.status !== 200) {
const statusText = res.statusText;
const responseBody = await res.text();
console.error(`vLLM API response error: ${responseBody}`);
throw new Error(
`The vLLM API has encountered an error with a status code of ${res.status} ${statusText}: ${responseBody}`
);
}
}

return new ReadableStream({
async start(controller) {
const onParse = (event: ParsedEvent | ReconnectInterval) => {
if (event.type === "event") {
const data = event.data;

if (data === "[DONE]") {
controller.close();
return;
}

try {
const json = JSON.parse(data);
const text = json.choices[0].delta.content;
const queue = encoder.encode(text);
controller.enqueue(queue);
} catch (e) {
controller.error(e);
}
}
};

const parser = createParser(onParse);

for await (const chunk of res.body as any) {
// An extra newline is required to make AzureOpenAI work.
const str = decoder.decode(chunk).replace("[DONE]\n", "[DONE]\n\n");
parser.feed(str);
}
},
});
};
34 changes: 28 additions & 6 deletions src/app/api/models/route.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
export async function GET(req: Request) {
const res = await fetch(process.env.VLLM_URL + "/v1/models");
return new Response(res.body, res);
}
import { NextRequest, NextResponse } from "next/server";

// forces the route handler to be dynamic
export const dynamic = "force-dynamic";
export async function GET(req: NextRequest) {
try {
const res = await fetch(process.env.VLLM_URL + "/v1/models");
if (res.status !== 200) {
const statusText = res.statusText;
const responseBody = await res.text();
console.error(`vLLM /api/models response error: ${responseBody}`);
return NextResponse.json(
{
success: false,
error: statusText,
},
{ status: res.status }
);
}
return new Response(res.body, res);
} catch (error) {
console.error(error);
return NextResponse.json(
{
success: false,
error: error instanceof Error ? error.message : "Unknown error",
},
{ status: 500 }
);
}
}
4 changes: 2 additions & 2 deletions src/app/layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ export const runtime = "edge"; // 'nodejs' (default) | 'edge'
const inter = Inter({ subsets: ["latin"] });

export const metadata: Metadata = {
title: "Ollama UI",
description: "Ollama chatbot web interface",
title: "vLLM UI",
description: "vLLM chatbot web interface",
};

export const viewport = {
Expand Down
Loading

0 comments on commit ca709c2

Please sign in to comment.