Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[widgets] use chatCompletionStream #664

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/tasks/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ export type { LibraryUiElement, ModelLibraryKey } from "./model-libraries";
export type { ModelData, TransformersInfo } from "./model-data";
export type { AddedToken, SpecialTokensMap, TokenizerConfig } from "./tokenizer-data";
export type {
ChatMessage,
WidgetExample,
WidgetExampleAttribute,
WidgetExampleAssetAndPromptInput,
Expand Down
9 changes: 3 additions & 6 deletions packages/tasks/src/widget-example.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* See default-widget-inputs.ts for the default widget inputs, this files only contains the types
*/

import type { ChatCompletionInputMessage } from "./tasks";

type TableData = Record<string, (string | number)[]>;

//#region outputs
Expand Down Expand Up @@ -51,13 +53,8 @@ export interface WidgetExampleBase<TOutput> {
output?: TOutput;
}

export interface ChatMessage {
role: "user" | "assistant" | "system";
content: string;
}

export interface WidgetExampleChatInput<TOutput = WidgetExampleOutput> extends WidgetExampleBase<TOutput> {
messages: ChatMessage[];
messages: ChatCompletionInputMessage[];
}

export interface WidgetExampleTextInput<TOutput = WidgetExampleOutput> extends WidgetExampleBase<TOutput> {
Expand Down
1 change: 0 additions & 1 deletion packages/widgets/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
],
"dependencies": {
"@huggingface/inference": "workspace:^",
"@huggingface/jinja": "workspace:^",
"@huggingface/tasks": "workspace:^",
"marked": "^12.0.2"
},
Expand Down
3 changes: 0 additions & 3 deletions packages/widgets/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

import { isFullyScrolled, scrollToMax } from "../../../../utils/ViewUtils.js";
import WidgetOutputConvoBubble from "../WidgetOuputConvoBubble/WidgetOutputConvoBubble.svelte";
import type { ChatMessage, SpecialTokensMap } from "@huggingface/tasks";
import type { ChatCompletionInputMessage, SpecialTokensMap } from "@huggingface/tasks";
import { widgetStates } from "../../stores.js";

export let modelId: string;
export let messages: ChatMessage[];
export let messages: ChatCompletionInputMessage[];
export let specialTokensMap: SpecialTokensMap | undefined = undefined;

let wrapperEl: HTMLElement;
Expand All @@ -30,8 +30,10 @@
</div>
<div class="flex flex-col items-end space-y-4 p-3">
{#each messages as message}
{@const position = message.role === "user" ? "right" : message.role === "assistant" ? "left" : "center"}
<WidgetOutputConvoBubble {position} {specialTokensMap} text={message.content} />
{#if message.content}
{@const position = message.role === "user" ? "right" : message.role === "assistant" ? "left" : "center"}
<WidgetOutputConvoBubble {position} {specialTokensMap} text={message.content} />
{/if}
{/each}
</div>
</div>
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type {
ChatMessage,
ChatCompletionInputMessage,
WidgetExampleAssetAndPromptInput,
WidgetExampleAssetAndTextInput,
WidgetExampleAssetAndZeroShotInput,
Expand Down Expand Up @@ -104,7 +104,7 @@ export function isChatInput<TOutput>(sample: unknown): sample is WidgetExampleCh
"messages" in sample &&
Array.isArray(sample.messages) &&
sample.messages.every(
(message): message is ChatMessage =>
(message): message is ChatCompletionInputMessage =>
isObject(message) &&
"role" in message &&
"content" in message &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@
import { onMount, tick } from "svelte";
import type { WidgetProps, ExampleRunOpts, InferenceRunOpts } from "../../shared/types.js";
import type { Options } from "@huggingface/inference";
import { Template } from "@huggingface/jinja";
import type {
SpecialTokensMap,
TokenizerConfig,
WidgetExampleTextInput,
TextGenerationInput,
ChatCompletionInput,
WidgetExampleOutputText,
WidgetExampleChatInput,
WidgetExample,
AddedToken,
ChatCompletionInputMessage,
} from "@huggingface/tasks";
import { SPECIAL_TOKENS_ATTRIBUTES } from "@huggingface/tasks";
import { HfInference } from "@huggingface/inference";

import type { ChatMessage } from "@huggingface/tasks";
import WidgetOutputConvo from "../../shared/WidgetOutputConvo/WidgetOutputConvo.svelte";
import WidgetQuickInput from "../../shared/WidgetQuickInput/WidgetQuickInput.svelte";
import WidgetWrapper from "../../shared/WidgetWrapper/WidgetWrapper.svelte";
Expand All @@ -40,21 +39,20 @@

$: isDisabled = $widgetStates?.[model.id]?.isDisabled;

let messages: ChatMessage[] = [];
let messages: ChatCompletionInputMessage[] = [];
let error: string = "";
let isLoading: boolean = false;
let outputJson: string;
let text = "";

let compiledTemplate: Template;
let tokenizerConfig: TokenizerConfig;
let specialTokensMap: SpecialTokensMap | undefined = undefined;
let inferenceClient: HfInference | undefined = undefined;
let abort: AbortController | undefined = undefined;

$: inferenceClient = new HfInference(apiToken);

// Check config and compile template
// check config
onMount(() => {
const config = model.config;
if (config === undefined) {
Expand All @@ -81,12 +79,6 @@
error = "No chat template found in tokenizer config";
return;
}
try {
compiledTemplate = new Template(chatTemplate);
} catch (e) {
error = `Invalid chat template: "${(e as Error).message}"`;
return;
}
});

async function handleNewMessage(): Promise<void> {
Expand Down Expand Up @@ -125,33 +117,18 @@
await tick();
return;
}
if (!compiledTemplate) {
return;
}
if (!inferenceClient) {
error = "Inference client not ready";
return;
}
// Render chat template

specialTokensMap = extractSpecialTokensMap(tokenizerConfig);

let chatText;
try {
chatText = compiledTemplate.render({
messages,
add_generation_prompt: true,
...specialTokensMap,
});
} catch (e) {
error = `An error occurred while rendering the chat template: "${(e as Error).message}"`;
return;
}
const previousMessages = [...messages];

const input: TextGenerationInput & Required<Pick<TextGenerationInput, "parameters">> = {
inputs: chatText,
parameters: {
return_full_text: false,
},
Comment on lines -138 to -154
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given that's recent code from @Wauplin and @SBrandeis, i would like a review from them on this

const input: ChatCompletionInput = {
model: model.id,
messages: previousMessages,
};
addInferenceParameters(input, model);

Expand All @@ -171,32 +148,44 @@

tgiSupportedModels = await getTgiSupportedModels(apiUrl);
if ($tgiSupportedModels?.has(model.id)) {
console.debug("Starting text generation using the TGI streaming API");
console.debug("Starting chat completion using the TGI streaming API");
let newMessage = {
role: "assistant",
content: "",
} satisfies ChatMessage;
const previousMessages = [...messages];
const tokenStream = inferenceClient.textGenerationStream(
} satisfies ChatCompletionInputMessage;

const tokenStream = inferenceClient.chatCompletionStream(
{
...input,
model: model.id,
accessToken: apiToken,
...input,
},
opts
);

for await (const newToken of tokenStream) {
if (newToken.token.special) continue;
newMessage.content = newMessage.content + newToken.token.text;
const newTokenContent = newToken.choices?.[0].delta.content;
if (!newTokenContent) {
continue;
}
newMessage.content = newMessage.content + newTokenContent;
messages = [...previousMessages, newMessage];
await tick();
}
} else {
console.debug("Starting text generation using the synchronous API");
input.parameters.max_new_tokens = 100;
const output = await inferenceClient.textGeneration({ ...input, model: model.id, accessToken: apiToken }, opts);
messages = [...messages, { role: "assistant", content: output.generated_text }];
await tick();
console.debug("Starting chat completion using the synchronous API");
input.max_new_tokens = 100;
const output = await inferenceClient.chatCompletion(
{
accessToken: apiToken,
...input,
},
opts
);
const newAssistantMsg = output.choices.at(-1)?.message;
if (newAssistantMsg) {
messages = [...messages, newAssistantMsg];
await tick();
}
}
} catch (e) {
if (!isOnLoadCall) {
Expand Down
Loading