diff --git a/lib/shared/src/configuration.ts b/lib/shared/src/configuration.ts index b977e0d7c73b..131a0a43ffa3 100644 --- a/lib/shared/src/configuration.ts +++ b/lib/shared/src/configuration.ts @@ -37,6 +37,7 @@ export interface Configuration { | 'anthropic' | 'fireworks' | 'unstable-openai' + | 'experimental-openaicompatible' | 'experimental-ollama' | null autocompleteAdvancedModel: string | null diff --git a/lib/shared/src/sourcegraph-api/completions/client.ts b/lib/shared/src/sourcegraph-api/completions/client.ts index f19cdd4632e8..cb207b22cef5 100644 --- a/lib/shared/src/sourcegraph-api/completions/client.ts +++ b/lib/shared/src/sourcegraph-api/completions/client.ts @@ -94,8 +94,12 @@ export abstract class SourcegraphCompletionsClient { apiVersion: number, signal?: AbortSignal ): AsyncGenerator { - // This is a technique to convert a function that takes callbacks to an async generator. + // Provide default stop sequence for starchat models. + if (!params.stopSequences && params?.model?.startsWith('openaicompatible/starchat')) { + params.stopSequences = ['<|end|>'] + } + // This is a technique to convert a function that takes callbacks to an async generator. const values: Promise[] = [] let resolve: ((value: CompletionGeneratorValue) => void) | undefined values.push( diff --git a/vscode/CHANGELOG.md b/vscode/CHANGELOG.md index 199317c8e504..82f7ef4a5a21 100644 --- a/vscode/CHANGELOG.md +++ b/vscode/CHANGELOG.md @@ -6,6 +6,7 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a ### Added +- Cody Enterprise users now have access to an `experimental-openaicompatible` which allows bringing your own LLM via any OpenAI-compatible API. For now, this is only supported with Starchat and specific configurations - but we continue to generalize this work to support more models and OpenAI-compatible endpoints. [pull/3218](https://github.com/sourcegraph/cody/pull/3218) - Edit/Chat: Cody now expands the selection to the nearest enclosing function, if available, before attempting to expand to the nearest enclosing block. [pull/3507](https://github.com/sourcegraph/cody/pull/3507) - Edit: New `cody.edit.preInstruction` configuration option for adding custom instruction at the end of all your requests. [pull/3542](https://github.com/sourcegraph/cody/pull/3542) - Edit: Add support for the new `cody.edit.preInstruction` setting. [pull/3542](https://github.com/sourcegraph/cody/pull/3542) diff --git a/vscode/package.json b/vscode/package.json index 914f3abfb557..01d53e0e8f5e 100644 --- a/vscode/package.json +++ b/vscode/package.json @@ -970,7 +970,7 @@ "cody.autocomplete.advanced.provider": { "type": "string", "default": null, - "enum": [null, "anthropic", "fireworks", "unstable-openai", "experimental-ollama"], + "enum": [null, "anthropic", "fireworks", "unstable-openai", "experimental-ollama", "experimental-openaicompatible"], "markdownDescription": "The provider used for code autocomplete. Most providers other than `anthropic` require the `cody.autocomplete.advanced.serverEndpoint` and `cody.autocomplete.advanced.accessToken` settings to also be set. Check the Cody output channel for error messages if autocomplete is not working as expected." }, "cody.autocomplete.advanced.serverEndpoint": { diff --git a/vscode/src/completions/providers/create-provider.test.ts b/vscode/src/completions/providers/create-provider.test.ts index 9ea1193f254d..ad1d2bd5a292 100644 --- a/vscode/src/completions/providers/create-provider.test.ts +++ b/vscode/src/completions/providers/create-provider.test.ts @@ -87,6 +87,33 @@ describe('createProviderConfig', () => { expect(provider?.model).toBe('starcoder-hybrid') }) + it('returns "experimental-openaicompatible" provider config and corresponding model if specified', async () => { + const provider = await createProviderConfig( + getVSCodeConfigurationWithAccessToken({ + autocompleteAdvancedProvider: 'experimental-openaicompatible', + autocompleteAdvancedModel: 'starchat-16b-beta', + }), + dummyCodeCompletionsClient, + dummyAuthStatus + ) + expect(provider?.identifier).toBe('experimental-openaicompatible') + expect(provider?.model).toBe('starchat-16b-beta') + }) + + it('returns "experimental-openaicompatible" provider config if specified in settings and default model', async () => { + const provider = await createProviderConfig( + getVSCodeConfigurationWithAccessToken({ + autocompleteAdvancedProvider: 'experimental-openaicompatible', + }), + dummyCodeCompletionsClient, + dummyAuthStatus + ) + expect(provider?.identifier).toBe('experimental-openaicompatible') + // TODO(slimsag): make this default to starchat2 once added + // specifically just when using `experimental-openaicompatible` + expect(provider?.model).toBe('starcoder-hybrid') + }) + it('returns "openai" provider config if specified in VSCode settings; model is ignored', async () => { const provider = await createProviderConfig( getVSCodeConfigurationWithAccessToken({ diff --git a/vscode/src/completions/providers/create-provider.ts b/vscode/src/completions/providers/create-provider.ts index 798494a99c9f..7dabcb51ce8a 100644 --- a/vscode/src/completions/providers/create-provider.ts +++ b/vscode/src/completions/providers/create-provider.ts @@ -17,6 +17,7 @@ import { type FireworksOptions, createProviderConfig as createFireworksProviderConfig, } from './fireworks' +import { createProviderConfig as createOpenAICompatibleProviderConfig } from './openaicompatible' import type { ProviderConfig } from './provider' import { createProviderConfig as createUnstableOpenAIProviderConfig } from './unstable-openai' @@ -52,6 +53,15 @@ export async function createProviderConfig( case 'anthropic': { return createAnthropicProviderConfig({ client, model }) } + case 'experimental-openaicompatible': { + return createOpenAICompatibleProviderConfig({ + client, + model: config.autocompleteAdvancedModel ?? model ?? null, + timeouts: config.autocompleteTimeouts, + authStatus, + config, + }) + } case 'experimental-ollama': case 'unstable-ollama': { return createExperimentalOllamaProviderConfig( @@ -102,6 +112,14 @@ export async function createProviderConfig( authStatus, config, }) + case 'experimental-openaicompatible': + return createOpenAICompatibleProviderConfig({ + client, + timeouts: config.autocompleteTimeouts, + model: model ?? null, + authStatus, + config, + }) case 'aws-bedrock': case 'anthropic': return createAnthropicProviderConfig({ diff --git a/vscode/src/completions/providers/openaicompatible.ts b/vscode/src/completions/providers/openaicompatible.ts new file mode 100644 index 000000000000..021eeec87540 --- /dev/null +++ b/vscode/src/completions/providers/openaicompatible.ts @@ -0,0 +1,416 @@ +import * as vscode from 'vscode' + +import { + type AutocompleteTimeouts, + type CodeCompletionsClient, + type CodeCompletionsParams, + type CompletionResponseGenerator, + type ConfigurationWithAccessToken, + displayPath, + tokensToChars, +} from '@sourcegraph/cody-shared' + +import { getLanguageConfig } from '../../tree-sitter/language' +import { CLOSING_CODE_TAG, OPENING_CODE_TAG, getHeadAndTail } from '../text-processing' +import type { ContextSnippet } from '../types' +import { forkSignal, generatorWithTimeout, zipGenerators } from '../utils' + +import type { AuthStatus } from '@sourcegraph/cody-shared' + +import { + type FetchCompletionResult, + fetchAndProcessDynamicMultilineCompletions, +} from './fetch-and-process-completions' +import { + MAX_RESPONSE_TOKENS, + getCompletionParams, + getLineNumberDependentCompletionParams, +} from './get-completion-params' +import { + type CompletionProviderTracer, + Provider, + type ProviderConfig, + type ProviderOptions, + standardContextSizeHints, +} from './provider' + +export interface OpenAICompatibleOptions { + model: OpenAICompatibleModel + maxContextTokens?: number + client: CodeCompletionsClient + timeouts: AutocompleteTimeouts + config: Pick + authStatus: Pick +} + +const PROVIDER_IDENTIFIER = 'experimental-openaicompatible' + +const EOT_STARCHAT = '<|end|>' +const EOT_STARCODER = '<|endoftext|>' +const EOT_LLAMA_CODE = ' ' + +// Model identifiers (we are the source/definition for these in case of the openaicompatible provider.) +const MODEL_MAP = { + starchat: 'openaicompatible/starchat-16b-beta', + 'starchat-16b-beta': 'openaicompatible/starchat-16b-beta', + + starcoder: 'openaicompatible/starcoder', + 'starcoder-16b': 'openaicompatible/starcoder-16b', + 'starcoder-7b': 'openaicompatible/starcoder-7b', + 'llama-code-7b': 'openaicompatible/llama-code-7b', + 'llama-code-13b': 'openaicompatible/llama-code-13b', + 'llama-code-13b-instruct': 'openaicompatible/llama-code-13b-instruct', + 'mistral-7b-instruct-4k': 'openaicompatible/mistral-7b-instruct-4k', +} + +type OpenAICompatibleModel = + | keyof typeof MODEL_MAP + // `starcoder-hybrid` uses the 16b model for multiline requests and the 7b model for single line + | 'starcoder-hybrid' + +function getMaxContextTokens(model: OpenAICompatibleModel): number { + switch (model) { + case 'starchat': + case 'starchat-16b-beta': + case 'starcoder': + case 'starcoder-hybrid': + case 'starcoder-16b': + case 'starcoder-7b': { + // StarCoder and StarChat support up to 8k tokens, we limit to ~6k so we do not hit token limits. + return 8192 - 2048 + } + case 'llama-code-7b': + case 'llama-code-13b': + case 'llama-code-13b-instruct': + // Llama Code was trained on 16k context windows, we're constraining it here to better + return 16384 - 2048 + case 'mistral-7b-instruct-4k': + return 4096 - 2048 + default: + return 1200 + } +} + +const lineNumberDependentCompletionParams = getLineNumberDependentCompletionParams({ + singlelineStopSequences: ['\n'], + multilineStopSequences: ['\n\n', '\n\r\n'], +}) + +class OpenAICompatibleProvider extends Provider { + private model: OpenAICompatibleModel + private promptChars: number + private client: CodeCompletionsClient + private timeouts?: AutocompleteTimeouts + + constructor( + options: ProviderOptions, + { model, maxContextTokens, client, timeouts }: Required + ) { + super(options) + this.timeouts = timeouts + this.model = model + this.promptChars = tokensToChars(maxContextTokens - MAX_RESPONSE_TOKENS) + this.client = client + } + + private createPrompt(snippets: ContextSnippet[]): string { + const { prefix, suffix } = this.options.docContext + + const intro: string[] = [] + let prompt = '' + + const languageConfig = getLanguageConfig(this.options.document.languageId) + + // In StarCoder we have a special token to announce the path of the file + if (!isStarCoderFamily(this.model)) { + intro.push(`Path: ${this.options.document.fileName}`) + } + + for (let snippetsToInclude = 0; snippetsToInclude < snippets.length + 1; snippetsToInclude++) { + if (snippetsToInclude > 0) { + const snippet = snippets[snippetsToInclude - 1] + if ('symbol' in snippet && snippet.symbol !== '') { + intro.push( + `Additional documentation for \`${snippet.symbol}\`:\n\n${snippet.content}` + ) + } else { + intro.push( + `Here is a reference snippet of code from ${displayPath(snippet.uri)}:\n\n${ + snippet.content + }` + ) + } + } + + const introString = `${intro + .join('\n\n') + .split('\n') + .map(line => (languageConfig ? languageConfig.commentStart + line : '// ')) + .join('\n')}\n` + + const suffixAfterFirstNewline = getSuffixAfterFirstNewline(suffix) + + const nextPrompt = this.createInfillingPrompt( + vscode.workspace.asRelativePath(this.options.document.fileName), + introString, + prefix, + suffixAfterFirstNewline + ) + + if (nextPrompt.length >= this.promptChars) { + return prompt + } + + prompt = nextPrompt + } + + return prompt + } + + public generateCompletions( + abortSignal: AbortSignal, + snippets: ContextSnippet[], + tracer?: CompletionProviderTracer + ): AsyncGenerator { + const partialRequestParams = getCompletionParams({ + providerOptions: this.options, + timeouts: this.timeouts, + lineNumberDependentCompletionParams, + }) + + // starchat: Only use infill if the suffix is not empty + const useInfill = this.options.docContext.suffix.trim().length > 0 + const promptProps: Prompt = { + snippets: [], + uri: this.options.document.uri, + prefix: this.options.docContext.prefix, + suffix: this.options.docContext.suffix, + languageId: this.options.document.languageId, + } + + const prompt = this.model.startsWith('starchat') + ? promptString(promptProps, useInfill, this.model) + : this.createPrompt(snippets) + + const { multiline } = this.options + const requestParams: CodeCompletionsParams = { + ...partialRequestParams, + messages: [{ speaker: 'human', text: prompt }], + temperature: 0.2, + topK: 0, + model: + this.model === 'starcoder-hybrid' + ? MODEL_MAP[multiline ? 'starcoder-16b' : 'starcoder-7b'] + : this.model.startsWith('starchat') + ? '' // starchat is not a supported backend model yet, use the default server-chosen model. + : MODEL_MAP[this.model], + } + + tracer?.params(requestParams) + + const completionsGenerators = Array.from({ length: this.options.n }).map(() => { + const abortController = forkSignal(abortSignal) + + const completionResponseGenerator = generatorWithTimeout( + this.createDefaultClient(requestParams, abortController), + requestParams.timeoutMs, + abortController + ) + + return fetchAndProcessDynamicMultilineCompletions({ + completionResponseGenerator, + abortController, + providerSpecificPostProcess: this.postProcess, + providerOptions: this.options, + }) + }) + + /** + * This implementation waits for all generators to yield values + * before passing them to the consumer (request-manager). While this may appear + * as a performance bottleneck, it's necessary for the current design. + * + * The consumer operates on promises, allowing only a single resolve call + * from `requestManager.request`. Therefore, we must wait for the initial + * batch of completions before returning them collectively, ensuring all + * are included as suggested completions. + * + * To circumvent this performance issue, a method for adding completions to + * the existing suggestion list is needed. Presently, this feature is not + * available, and the switch to async generators maintains the same behavior + * as with promises. + */ + return zipGenerators(completionsGenerators) + } + + private createInfillingPrompt( + filename: string, + intro: string, + prefix: string, + suffix: string + ): string { + if (isStarCoderFamily(this.model) || isStarChatFamily(this.model)) { + // c.f. https://huggingface.co/bigcode/starcoder#fill-in-the-middle + // c.f. https://arxiv.org/pdf/2305.06161.pdf + return `${filename}${intro}${prefix}${suffix}` + } + if (isLlamaCode(this.model)) { + // c.f. https://github.com/facebookresearch/codellama/blob/main/llama/generation.py#L402 + return `
 ${intro}${prefix} ${suffix} `
+        }
+        if (this.model === 'mistral-7b-instruct-4k') {
+            // This part is copied from the anthropic prompt but fitted into the Mistral instruction format
+            const relativeFilePath = vscode.workspace.asRelativePath(this.options.document.fileName)
+            const { head, tail } = getHeadAndTail(this.options.docContext.prefix)
+            const infillSuffix = this.options.docContext.suffix
+            const infillBlock = tail.trimmed.endsWith('{\n') ? tail.trimmed.trimEnd() : tail.trimmed
+            const infillPrefix = head.raw
+            return `[INST] Below is the code from file path ${relativeFilePath}. Review the code outside the XML tags to detect the functionality, formats, style, patterns, and logics in use. Then, use what you detect and reuse methods/libraries to complete and enclose completed code only inside XML tags precisely without duplicating existing implementations. Here is the code:
+\`\`\`
+${intro}${infillPrefix}${OPENING_CODE_TAG}${CLOSING_CODE_TAG}${infillSuffix}
+\`\`\`[/INST]
+ ${OPENING_CODE_TAG}${infillBlock}`
+        }
+
+        console.error('Could not generate infilling prompt for', this.model)
+        return `${intro}${prefix}`
+    }
+
+    private postProcess = (content: string): string => {
+        if (isStarCoderFamily(this.model)) {
+            return content.replace(EOT_STARCODER, '')
+        }
+        if (isStarChatFamily(this.model)) {
+            return content.replace(EOT_STARCHAT, '')
+        }
+        if (isLlamaCode(this.model)) {
+            return content.replace(EOT_LLAMA_CODE, '')
+        }
+        return content
+    }
+
+    private createDefaultClient(
+        requestParams: CodeCompletionsParams,
+        abortController: AbortController
+    ): CompletionResponseGenerator {
+        return this.client.complete(requestParams, abortController)
+    }
+}
+
+export function createProviderConfig({
+    model,
+    timeouts,
+    ...otherOptions
+}: Omit & {
+    model: string | null
+}): ProviderConfig {
+    const resolvedModel =
+        model === null || model === ''
+            ? 'starcoder-hybrid'
+            : model === 'starcoder-hybrid'
+              ? 'starcoder-hybrid'
+              : Object.prototype.hasOwnProperty.call(MODEL_MAP, model)
+                  ? (model as keyof typeof MODEL_MAP)
+                  : null
+
+    if (resolvedModel === null) {
+        throw new Error(`Unknown model: \`${model}\``)
+    }
+
+    const maxContextTokens = getMaxContextTokens(resolvedModel)
+
+    return {
+        create(options: ProviderOptions) {
+            return new OpenAICompatibleProvider(
+                {
+                    ...options,
+                    id: PROVIDER_IDENTIFIER,
+                },
+                {
+                    model: resolvedModel,
+                    maxContextTokens,
+                    timeouts,
+                    ...otherOptions,
+                }
+            )
+        },
+        contextSizeHints: standardContextSizeHints(maxContextTokens),
+        identifier: PROVIDER_IDENTIFIER,
+        model: resolvedModel,
+    }
+}
+
+// We want to remove the same line suffix from a completion request since both StarCoder and Llama
+// code can't handle this correctly.
+function getSuffixAfterFirstNewline(suffix: string): string {
+    const firstNlInSuffix = suffix.indexOf('\n')
+
+    // When there is no next line, the suffix should be empty
+    if (firstNlInSuffix === -1) {
+        return ''
+    }
+
+    return suffix.slice(suffix.indexOf('\n'))
+}
+
+function isStarChatFamily(model: string): boolean {
+    return model.startsWith('starchat')
+}
+
+function isStarCoderFamily(model: string): boolean {
+    return model.startsWith('starcoder')
+}
+
+function isLlamaCode(model: string): boolean {
+    return model.startsWith('llama-code')
+}
+
+interface Prompt {
+    snippets: { uri: vscode.Uri; content: string }[]
+
+    uri: vscode.Uri
+    prefix: string
+    suffix: string
+
+    languageId: string
+}
+
+function fileNameLine(uri: vscode.Uri, commentStart: string): string {
+    return `${commentStart} Path: ${displayPath(uri)}\n`
+}
+
+function promptString(prompt: Prompt, infill: boolean, model: string): string {
+    const config = getLanguageConfig(prompt.languageId)
+    const commentStart = config?.commentStart || '//'
+
+    const context = prompt.snippets
+        .map(
+            ({ uri, content }) =>
+                fileNameLine(uri, commentStart) +
+                content
+                    .split('\n')
+                    .map(line => `${commentStart} ${line}`)
+                    .join('\n')
+        )
+        .join('\n\n')
+
+    const currentFileNameComment = fileNameLine(prompt.uri, commentStart)
+
+    if (model.startsWith('codellama:') && infill) {
+        const infillPrefix = context + currentFileNameComment + prompt.prefix
+
+        /**
+         * The infilll prompt for Code Llama.
+         * Source: https://github.com/facebookresearch/codellama/blob/e66609cfbd73503ef25e597fd82c59084836155d/llama/generation.py#L418
+         *
+         * Why are there spaces left and right?
+         * > For instance, the model expects this format: `
 {pre} {suf} `.
+         * But you won’t get infilling if the last space isn’t added such as in `
 {pre} {suf}`
+         *
+         * Source: https://blog.fireworks.ai/simplifying-code-infilling-with-code-llama-and-fireworks-ai-92c9bb06e29c
+         */
+        return `
 ${infillPrefix} ${prompt.suffix} `
+    }
+
+    return context + currentFileNameComment + prompt.prefix
+}
diff --git a/vscode/src/configuration.ts b/vscode/src/configuration.ts
index 070b6e2ee2c1..384a8a2077a5 100644
--- a/vscode/src/configuration.ts
+++ b/vscode/src/configuration.ts
@@ -52,7 +52,10 @@ export function getConfiguration(
     }
 
     let autocompleteAdvancedProvider = config.get<
-        Configuration['autocompleteAdvancedProvider'] | 'unstable-ollama' | 'unstable-fireworks'
+        | Configuration['autocompleteAdvancedProvider']
+        | 'unstable-ollama'
+        | 'unstable-fireworks'
+        | 'experimental-openaicompatible'
     >(CONFIG_KEY.autocompleteAdvancedProvider, null)
 
     // Handle deprecated provider identifiers