diff --git a/completions-review-tool/data/starcoder-fireworks-1690539528108.json b/completions-review-tool/data/starcoder-fireworks-1690539528108.json new file mode 100644 index 000000000000..4f4f29a8a861 --- /dev/null +++ b/completions-review-tool/data/starcoder-fireworks-1690539528108.json @@ -0,0 +1,243 @@ +[ + { + "completions": [ + "signale.info(message)" + ], + "elapsed": 1362, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "logger.ts", + "languageId": "typescript", + "content": "import signale from 'signale'\n\nfunction logMessage(message: string) {\n ️🔥\n}" + } + }, + { + "completions": [ + "const dateString = new Date().toUTCString() \n const filePath = path.join(__dirname, 'date.txt') \n fs.writeFileSync(filePath, dateString, 'utf8')", + "const date = new Date() \n const filePath = path.join(__dirname, 'date.txt')\n fs.writeFileSync(filePath, date.toString())", + "const date = new Date() \n const fileName = path.join(__dirname, 'date.txt')\n fs.writeFileSync(fileName, date.toString())" + ], + "elapsed": 1038, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "writer.ts", + "languageId": "typescript", + "content": "import path from 'path'\n\nfunction writeDateToDisk() {\n ️🔥\n}" + } + }, + { + "completions": [ + "const lineNumber = position.line + 1\n return {\n text: this.lines[position.line],\n range: new vscode.Range(\n this.positionAt(position.line), \n this.positionAt(position.line + 1)\n )\n }", + "let lineNumber = position.line + 1\n return {\n text: this.lines[position.line],\n range: new vscode.Range(\n this.positionAt(position.line), \n this.positionAt(position.line + 1))\n }", + "const lineNumber = position.line + 1\n return {\n text: this.lines[position.line],\n lineNumber: lineNumber\n }" + ], + "elapsed": 1709, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "text-document.ts", + "languageId": "typescript", + "content": "class TextDocument implements vscode.TextDocument {\n private text: string\n\n constructor(public uri: vscode.Uri, text: string) {\n this.text = text.replace(/\r\n/gm, '\n') // normalize end of line\n }\n\n private get lines(): string[] {\n return this.text.split('\n')\n }\n\n lineAt(position: number | vscode.Position): vscode.TextLine {\n ️🔥\n }\n}" + } + }, + { + "completions": [ + "try {\n return execFileSync('uname', ['-s']).toString().trim()\n } catch (err) {\n return null;\n } \n }\n return null;\n }", + "try {\n return execFileSync('uname').toString().trim()\n } catch (err) {\n return null;\n }\n } else {\n return null;\n }", + "try {\n const {platform} = require('os');\n return platform; \n } catch (err) {\n return null; \n }\n }" + ], + "elapsed": 2064, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "getOs.ts", + "languageId": "typescript", + "content": "import { execFileSync } from 'child_process'\n\nfunction getOSName(): string | null {\n if (typeof window === 'undefined') {\n ️🔥\n}" + } + }, + { + "completions": [ + "Media && window.matchMedia('(prefers-color-scheme: dark)').matches;" + ], + "elapsed": 689, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "isDarkColorScheme.ts", + "languageId": "typescript", + "content": "function isDarkColorScheme(): boolean {\n return window.match️🔥\n}" + } + }, + { + "completions": [ + " === 'localhost' || window.location.host === '127.0.0.1';" + ], + "elapsed": 727, + "timestamp": "1690539528108", + "sample": { + "context": [], + "fileName": "isLocalhost.ts", + "languageId": "typescript", + "content": "function isLocalhost(): boolean {\n return window.location.host️🔥\n}" + } + }, + { + "completions": [ + "return text.toLowerCase().replace(/[^a-z0-9]/g, \"-\");", + "return text.toLowerCase().replace(/\\s/g, \"-\");" + ], + "elapsed": 1298, + "timestamp": "1690539528108", + "sample": { + "context": [ + { + "fileName": "lib/parser/excerpt.ts", + "content": "export function excerpt(text: string, limit: number = 144) {\n let result = \"\";\n\n for (const word of text.split(\" \")) {\n if (result.length + word.length + 1 <= limit) {\n result += \" \" + word;\n } else {\n // Fix trailing comma. Might need a more generic solution at some point :D\n if (result.endsWith(\",\")) {\n result = result.slice(0, -1);\n }\n result += \"…\";\n break;\n }\n }\n\n return result;\n}\n" + }, + { + "fileName": "lib/parser/post.ts", + "content": "\nimport format from \"date-fns/format\";\nimport fs from \"fs/promises\";\nimport path from \"path\";\nimport { parseMarkdown } from \"./markdown\";\n\ninterface ExternalPost {\n type: \"external\";\n id: string;\n title: string;\n excerpt: string;\n formattedDate: string;\n readingTime: string;\n contentHtml: string;\n external: string;\n date: string;\n}\nexport interface BlogPost {\n type: \"blog\";\n id: string;\n title: string;\n excerpt: string;\n formattedDate: string;\n readingTime: string;\n contentHtml: string;\n date: string;\n}\n\nexport type Post = ExternalPost | BlogPost;\n\nexport async function getPost(id: string): Promise {\n // Read markdown file as string\n const fullPath = path.join(postsDirectory, id + \".md\");\n const fileContents = await fs.readFile(fullPath, \"utf8\");\n\n const { data, readingTime, contentHtml, excerpt } = await parseMarkdown(\n fileContents\n );\n\n return {\n id,\n ...data,\n type: data.external ? \"external\" : \"blog\",\n excerpt,\n formattedDate: format(new Date(data.date), \"LLLL d, Y\"),\n readingTime,\n contentHtml,\n } as Post;\n}\n" + }, + { + "fileName": "lib/parser/posts.ts", + "content": "import fs from \"fs/promises\";\nimport path from \"path\";\nimport { getPost, Post } from \"./post\";\n\nexport const postsDirectory = path.join(process.cwd(), \"posts\");\n\nexport async function getPosts(): Promise {\n // Get file names under /posts\n const dirs = await fs.readdir(postsDirectory);\n\n let allPostsData: Post[] = [];\n for (const fileName of dirs) {\n if (fileName.indexOf(\".md\") === -1) {\n continue;\n }\n const stat = await fs.stat(path.join(postsDirectory, fileName));\n if (stat.isDirectory()) {\n continue;\n }\n\n // Remove \".md\" from file name to get the page slug\n const id = fileName.replace(/\\.md$/, \"\");\n\n allPostsData.push(await getPost(id));\n }\n\n // Sort posts by date\n return allPostsData.sort(({ date: a }, { date: b }) => {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n}\n" + }, + { + "fileName": "lib/parser/markdown.ts", + "content": "import { remark } from \"remark\";\n\nimport html from \"remark-html\";\nimport prism from \"remark-prism\";\nimport matter from \"gray-matter\";\nimport remarkFootnotes from \"remark-footnotes\";\nimport { excerpt } from \"./excerpt\";\nimport readingTime from \"reading-time\";\n\nexport async function parseMarkdown(markdown: string): Promise<{\n data: any;\n excerpt: string;\n contentHtml: string;\n readingTime: string;\n}> {\n // Use gray-matter to parse the post metadata section\n const matterResult = matter(markdown);\n let { content, data } = matterResult;\n\n content = content\n .replaceAll(/\\[x\\]/g, \"\")\n .replaceAll(/\\[.?\\]/g, \"\");\n\n const processedContent = await remark()\n .use(html, { sanitize: false })\n .use(prism)\n .use(remarkFootnotes)\n .process(content);\n const contentHtml = processedContent.toString();\n\n return {\n data,\n contentHtml,\n excerpt: excerpt(content),\n readingTime: readingTime(content).text,\n };\n}\n" + } + ], + "fileName": "lib/parser/notes.ts", + "languageId": "typescript", + "content": "import format from \"date-fns/format\";\nimport { parseMarkdown } from \"./markdown\";\n\nexport interface Note {\n title: string;\n id: string;\n formattedDate: string;\n date: string;\n category: string[];\n contentHtml: string;\n}\n\nconst TOKEN = process.env.GITHUB_TOKEN;\nconst GRAPHQL_URL = \"https://api.github.com/graphql\";\nconst HIDDEN_FILES = new Set([\"README.md\"]);\nconst HIDDEN_DIRS = new Set([\"Unlisted\"]);\n\n// No-op, used only for syntax highlighting in the IDE\nfunction gql(strings: TemplateStringsArray) {\n return strings.raw.join(\"\");\n}\n\nconst headers = {\n Authorization: `Bearer ${TOKEN}`,\n};\n\nconst CONTENTS_QUERY = gql`\n{\n repository(name: \"philipp-spiess\", owner: \"philipp-spiess\") {\n ref(qualifiedName: \"main\") {\n target {\n ... on Commit {\n tree {\n entries {\n ...MyTreeEntry\n object {\n ... on Tree {\n entries {\n ...MyTreeEntry\n object {\n ... on Tree {\n entries {\n ...MyTreeEntry\n }\n }\n }\n }\n }\n }\n }\n }\n }\n }\n }\n }\n}\n\nfragment MyTreeEntry on TreeEntry {\n path\n type\n blob: object {\n ... on Blob {\n text\n }\n }\n}\n`;\n\nexport async function getNotes(): Promise {\n let notes: Note[] = [];\n const rawNotes = (await fetchNotes()) as any;\n\n for (const rawNote of rawNotes) {\n const { data, contentHtml } = await parseMarkdown(rawNote.content);\n\n const date = data.date instanceof Date ? data.date.toISOString() : null;\n\n notes.push({\n title: rawNote.path.split(\"/\").pop().replace(\".md\", \"\"),\n id: getId(rawNote.path),\n date,\n formattedDate: format(new Date(date), \"LLLL d, Y\"),\n category: rawNote.path.split(\"/\").slice(0, -1),\n contentHtml,\n });\n }\n\n // Sort posts by date\n return notes.sort(({ date: a }, { date: b }) => {\n if (a < b) {\n return 1;\n } else if (a > b) {\n return -1;\n } else {\n return 0;\n }\n });\n}\n\ninterface RawNote {\n path: string;\n content: string;\n}\nasync function fetchNotes(dir: string = \"\"): Promise {\n const res = await fetch(GRAPHQL_URL, {\n method: \"POST\",\n headers,\n body: JSON.stringify({ query: CONTENTS_QUERY }),\n }).then((r) => r.json());\n\n return recursivelyResolveEntries(res.data.repository.ref.target.tree);\n}\n\ninterface GitHubTree {\n entries: Array<\n | {\n path: string;\n type: \"blob\";\n blob: {\n text: string;\n };\n }\n | {\n path: string;\n type: \"tree\";\n object: GitHubTree;\n }\n >;\n}\nfunction recursivelyResolveEntries(tree: GitHubTree): RawNote[] {\n let result: RawNote[] = [];\n for (let entry of tree.entries) {\n if (entry.type == \"blob\") {\n if (!entry.path.endsWith(\".md\") || HIDDEN_FILES.has(entry.path)) {\n continue;\n }\n\n result.push({\n path: entry.path,\n content: entry.blob.text,\n });\n } else {\n if (HIDDEN_DIRS.has(entry.path)) {\n continue;\n }\n\n result = result.concat(recursivelyResolveEntries(entry.object));\n }\n }\n return result;\n}\n\nfunction getId(text: string): string {\n return text.replace(\".md\", \"\").split(\"/\").map(getSlug).join(\"/\");\n}\n\nfunction getSlug(text: string): string {\n ️🔥\n}" + } + }, + { + "completions": [ + "responseBody := linesToResponse([]string{\n \"import foo\",\n \"foo.bar(\",\n })\n\n client := getMockClient(responseBody)\n\n var events []types.CompletionEvent\n assert.NoError(t, client.Stream(context.Background(), types.CompletionFeatureCode, types.CompletionRequestParameters{}, func(event types.CompletionEvent) {\n events = append(events, event)\n }))\n\n assert.Len(t, events, 2)\n assert.Equal(t, \"import foo\", events[0].Completion)\n assert.Equal(t, \"foo.bar(\", events[1].Completion)\n}", + "responseBody := linesToResponse([]string{\"completion1\", \"completion2\"})\n client := getMockClient(responseBody)\n\n var events []types.CompletionEvent\n err := client.Stream(context.Background(), types.CompletionFeatureCode, types.CompletionRequestParameters{}, \n func(event types.CompletionEvent) {\n events = append(events, event)\n })\n assert.NoError(t, err)\n assert.Len(t, events, 2)\n assert.Equal(t, \"completion1\", events[0].Completion)\n assert.Equal(t, \"completion2\", events[1].Completion)\n}", + "responseBody := linesToResponse([]string{\"completion1\", \"completion2\", \"completion3\"})\n\n client := getMockClient(responseBody)\n\n var completions []string\n err := client.Stream(context.Background(), types.CompletionsFeatureLSP, types.CompletionRequestParameters{}, \n func(event types.CompletionEvent) {\n completions = append(completions, event.Completion)\n })\n\n require.NoError(t, err)\n assert.Equal(t, []string{\"completion1\", \"completion2\", \"completion3\"}, completions)\n}" + ], + "elapsed": 4220, + "timestamp": "1690539528108", + "sample": { + "context": [ + { + "fileName": "internal/completions/httpapi/handler.go", + "content": "package httpapi\n\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"net/http\"\n\t\"strconv\"\n\t\"time\"\n\n\t\"github.com/sourcegraph/sourcegraph/internal/cody\"\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/client\"\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/types\"\n\t\"github.com/sourcegraph/sourcegraph/internal/conf\"\n\t\"github.com/sourcegraph/sourcegraph/internal/conf/conftypes\"\n)\n\n// maxRequestDuration is the maximum amount of time a request can take before\n// being cancelled.\nconst maxRequestDuration = time.Minute\n\nfunc newCompletionsHandler(\n\trl RateLimiter,\n\ttraceFamily string,\n\tgetModel func(types.CodyCompletionRequestParameters, *conftypes.CompletionsConfig) string,\n\thandle func(context.Context, types.CompletionRequestParameters, types.CompletionsClient, http.ResponseWriter),\n) http.Handler {\n\treturn http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {\n\t\tif r.Method != \"POST\" {\n\t\t\thttp.Error(w, fmt.Sprintf(\"unsupported method %s\", r.Method), http.StatusMethodNotAllowed)\n\t\t\treturn\n\t\t}\n\n\t\tctx, cancel := context.WithTimeout(r.Context(), maxRequestDuration)\n\t\tdefer cancel()\n\n\t\tif isEnabled := cody.IsCodyEnabled(ctx); !isEnabled {\n\t\t\thttp.Error(w, \"cody experimental feature flag is not enabled for current user\", http.StatusUnauthorized)\n\t\t\treturn\n\t\t}\n\n\t\tcompletionsConfig := conf.GetCompletionsConfig(conf.Get().SiteConfig())\n\t\tif completionsConfig == nil {\n\t\t\thttp.Error(w, \"completions are not configured or disabled\", http.StatusInternalServerError)\n\t\t}\n\n\t\tvar requestParams types.CodyCompletionRequestParameters\n\t\tif err := json.NewDecoder(r.Body).Decode(&requestParams); err != nil {\n\t\t\thttp.Error(w, \"could not decode request body\", http.StatusBadRequest)\n\t\t\treturn" + }, + { + "fileName": "internal/completions/client/anthropic/anthropic.go", + "content": "package anthropic\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"net/http\"\n\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/types\"\n\t\"github.com/sourcegraph/sourcegraph/internal/httpcli\"\n\t\"github.com/sourcegraph/sourcegraph/lib/errors\"\n)\n\nfunc NewClient(cli httpcli.Doer, apiURL, accessToken string) types.CompletionsClient {\n\treturn &anthropicClient{\n\t\tcli: cli,\n\t\taccessToken: accessToken,\n\t\tapiURL: apiURL,\n\t}\n}\n\nconst (\n\tclientID = \"sourcegraph/1.0\"\n)\n\ntype anthropicClient struct {\n\tcli httpcli.Doer\n\taccessToken string\n\tapiURL string\n}\n\nfunc (a *anthropicClient) Complete(\n\tctx context.Context,\n\tfeature types.CompletionsFeature,\n\trequestParams types.CompletionRequestParameters,\n) (*types.CompletionResponse, error) {\n\tresp, err := a.makeRequest(ctx, requestParams, false)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\tdefer resp.Body.Close()\n\n\tvar response anthropicCompletionResponse\n\tif err := json.NewDecoder(resp.Body).Decode(&response); err != nil {\n\t\treturn nil, err\n\t}\n\treturn &types.CompletionResponse{\n\t\tCompletion: response.Completion,\n\t\tStopReason: response.StopReason,\n\t}, nil" + }, + { + "fileName": "internal/completions/client/codygateway/codygateway.go", + "content": "package codygateway\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"net/http\"\n\t\"net/url\"\n\t\"strings\"\n\n\t\"go.opentelemetry.io/otel/attribute\"\n\t\"go.opentelemetry.io/otel/trace\"\n\n\t\"github.com/sourcegraph/sourcegraph/internal/codygateway\"\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/client/anthropic\"\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/client/openai\"\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/types\"\n\t\"github.com/sourcegraph/sourcegraph/internal/conf/conftypes\"\n\t\"github.com/sourcegraph/sourcegraph/internal/httpcli\"\n\t\"github.com/sourcegraph/sourcegraph/lib/errors\"\n)\n\n// NewClient instantiates a completions provider backed by Sourcegraph's managed\n// Cody Gateway service.\nfunc NewClient(cli httpcli.Doer, endpoint, accessToken string) (types.CompletionsClient, error) {\n\tgatewayURL, err := url.Parse(endpoint)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\treturn &codyGatewayClient{\n\t\tupstream: cli,\n\t\tgatewayURL: gatewayURL,\n\t\taccessToken: accessToken,\n\t}, nil\n}\n\ntype codyGatewayClient struct {\n\tupstream httpcli.Doer\n\tgatewayURL *url.URL\n\taccessToken string\n}\n\nfunc (c *codyGatewayClient) Stream(ctx context.Context, feature types.CompletionsFeature, requestParams types.CompletionRequestParameters, sendEvent types.SendCompletionEvent) error {\n\tcc, err := c.clientForParams(feature, &requestParams)\n\tif err != nil {\n\t\treturn err\n\t}\n\treturn overwriteErrSource(cc.Stream(ctx, feature, requestParams, sendEvent))\n}\n\nfunc (c *codyGatewayClient) Complete(ctx context.Context, feature types.CompletionsFeature, requestParams types.CompletionRequestParameters) (*types.CompletionResponse, error) {" + }, + { + "fileName": "internal/completions/httpapi/codecompletion.go", + "content": "\nimport (\n\t\"context\"\n\t\"encoding/json\"\n\t\"net/http\"\n\n\t\"github.com/sourcegraph/log\"\n\n\t\"github.com/sourcegraph/sourcegraph/internal/completions/types\"\n\t\"github.com/sourcegraph/sourcegraph/internal/conf/conftypes\"\n\t\"github.com/sourcegraph/sourcegraph/internal/database\"\n\t\"github.com/sourcegraph/sourcegraph/internal/redispool\"\n\t\"github.com/sourcegraph/sourcegraph/internal/trace\"\n)\n\n// NewCodeCompletionsHandler is an http handler which sends back code completion results\nfunc NewCodeCompletionsHandler(logger log.Logger, db database.DB) http.Handler {\n\tlogger = logger.Scoped(\"code\", \"code completions handler\")\n\n\trl := NewRateLimiter(db, redispool.Store, types.CompletionsFeatureCode)\n\treturn newCompletionsHandler(rl, \"code\", func(requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) string {\n\t\t// No user defined models for now.\n\t\t// TODO(eseliger): Look into reviving this, but it was unused so far.\n\t\treturn c.CompletionModel\n\t}, func(ctx context.Context, requestParams types.CompletionRequestParameters, cc types.CompletionsClient, w http.ResponseWriter) {\n\t\tcompletion, err := cc.Complete(ctx, types.CompletionsFeatureCode, requestParams)\n\t\tif err != nil {\n\t\t\tlogFields := []log.Field{log.Error(err)}\n\n\t\t\t// Propagate the upstream headers to the client if available.\n\t\t\tif errNotOK, ok := types.IsErrStatusNotOK(err); ok {\n\t\t\t\terrNotOK.WriteHeader(w)\n\t\t\t\tif tc := errNotOK.SourceTraceContext; tc != nil {\n\t\t\t\t\tlogFields = append(logFields,\n\t\t\t\t\t\tlog.String(\"sourceTraceContext.traceID\", tc.TraceID),\n\t\t\t\t\t\tlog.String(\"sourceTraceContext.spanID\", tc.SpanID))\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tw.WriteHeader(http.StatusInternalServerError)\n\t\t\t}\n\t\t\t_, _ = w.Write([]byte(err.Error()))\n\n\t\t\ttrace.Logger(ctx, logger).Error(\"error on completion\", logFields...)\n\t\t\treturn\n\t\t}\n\n\t\tcompletionBytes, err := json.Marshal(completion)\n\t\tif err != nil {\n\t\t\thttp.Error(w, err.Error(), http.StatusInternalServerError)\n\t\t\treturn" + } + ], + "fileName": "internal/completions/client/anthropic/anthropic_test.go", + "languageId": "go", + "content": "package anthropic\n\nimport (\n \"bytes\"\n \"context\"\n \"fmt\"\n \"io\"\n \"net/http\"\n \"testing\"\n\n \"github.com/hexops/autogold/v2\"\n \"github.com/stretchr/testify/assert\"\n \"github.com/stretchr/testify/require\"\n\n \"github.com/sourcegraph/sourcegraph/internal/completions/types\"\n)\n\ntype mockDoer struct {\n do func(*http.Request) (*http.Response, error)\n}\n\nfunc (c *mockDoer) Do(r *http.Request) (*http.Response, error) {\n return c.do(r)\n}\n\nfunc linesToResponse(lines []string) []byte {\n responseBytes := []byte{}\n for _, line := range lines {\n responseBytes = append(responseBytes, []byte(fmt.Sprintf(\"data: %s\", line))...)\n responseBytes = append(responseBytes, []byte(\"\\r\n\\r\n\")...)\n }\n return responseBytes\n}\n\nfunc getMockClient(responseBody []byte) types.CompletionsClient {\n return NewClient(&mockDoer{\n func(r *http.Request) (*http.Response, error) {\n return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(bytes.NewReader(responseBody))}, nil\n },\n }, \"\", \"\")\n}\n\nfunc TestValidAnthropicStream(t *testing.T) {\n ️🔥\nfunc TestInvalidAnthropicStream(t *testing.T) {\n var mockAnthropicInvalidResponseLines = []string{`{]`}\n\n mockClient := getMockClient(linesToResponse(mockAnthropicInvalidResponseLines))\n err := mockClient.Stream(context.Background(), types.CompletionsFeatureChat, types.CompletionRequestParameters{}, func(event types.CompletionResponse) error { return nil })\n if err == nil {\n t.Fatal(\"expected error, got nil\")\n }\n assert.Contains(t, err.Error(), \"failed to decode event payload\")\n}\n\nfunc TestErrStatusNotOK(t *testing.T) {\n mockClient := NewClient(&mockDoer{\n func(r *http.Request) (*http.Response, error) {\n return &http.Response{\n StatusCode: http.StatusTooManyRequests," + } + }, + { + "completions": [ + "\"\"\"Test getting open issues.\"\"\"\n issues = api_client.get_open_issues()\n assert isinstance(issues, list)\n assert len(issues) > 0", + "\"\"\"Test getting open issues.\"\"\"\n issues = api_client.get_open_issues()\n assert issues\n assert isinstance(issues, list)", + "\"\"\"Test getting open issues.\"\"\"\n issues = api_client.get_open_issues()\n assert len(issues) > 0" + ], + "elapsed": 3660, + "timestamp": "1690539528108", + "sample": { + "context": [ + { + "fileName": "libs/langchain/langchain/utilities/github.py", + "content": "\"\"\"Util that calls GitHub.\"\"\"\nimport json\nfrom typing import Any, Dict, List, Optional\n\nfrom github.Issue import Issue\nfrom pydantic import BaseModel, Extra, root_validator\n\nfrom langchain.utils import get_from_dict_or_env\n\n\nclass GitHubAPIWrapper(BaseModel):\n \"\"\"Wrapper for GitHub API.\"\"\"\n\n github: Any #: :meta private:\n github_repo_instance: Any #: :meta private:\n github_repository: Optional[str] = None\n github_app_id: Optional[str] = None\n github_app_private_key: Optional[str] = None\n github_branch: Optional[str] = None\n\n class Config:\n \"\"\"Configuration for this pydantic object.\"\"\"\n\n extra = Extra.forbid\n\n @root_validator()\n def validate_environment(cls, values: Dict) -> Dict:\n \"\"\"Validate that api key and python package exists in environment.\"\"\"\n github_repository = get_from_dict_or_env(\n values, \"github_repository\", \"GITHUB_REPOSITORY\"\n )\n\n github_app_id = get_from_dict_or_env(values, \"github_app_id\", \"GITHUB_APP_ID\")\n\n github_app_private_key = get_from_dict_or_env(\n values, \"github_app_private_key\", \"GITHUB_APP_PRIVATE_KEY\"\n )\n\n github_branch = get_from_dict_or_env(\n values, \"github_branch\", \"GITHUB_BRANCH\", default=\"master\"\n )\n\n try:\n from github import Auth, GithubIntegration\n\n except ImportError:\n raise ImportError(\n \"PyGithub is not installed. \"\n \"Please install it with `pip install PyGithub`\"\n )" + }, + { + "fileName": "libs/langchain/langchain/agents/agent_toolkits/github/toolkit.py", + "content": "\"\"\"GitHub Toolkit.\"\"\"\nfrom typing import Dict, List\n\nfrom langchain.agents.agent_toolkits.base import BaseToolkit\nfrom langchain.tools import BaseTool\nfrom langchain.tools.github.prompt import (\n COMMENT_ON_ISSUE_PROMPT,\n CREATE_FILE_PROMPT,\n DELETE_FILE_PROMPT,\n GET_ISSUE_PROMPT,\n GET_ISSUES_PROMPT,\n READ_FILE_PROMPT,\n UPDATE_FILE_PROMPT,\n)\nfrom langchain.tools.github.tool import GitHubAction\nfrom langchain.utilities.github import GitHubAPIWrapper\n\n\nclass GitHubToolkit(BaseToolkit):\n \"\"\"GitHub Toolkit.\"\"\"\n\n tools: List[BaseTool] = []\n\n @classmethod\n def from_github_api_wrapper(\n cls, github_api_wrapper: GitHubAPIWrapper\n ) -> \"GitHubToolkit\":\n operations: List[Dict] = [\n {\n \"mode\": \"get_issues\",\n \"name\": \"Get Issues\",\n \"description\": GET_ISSUES_PROMPT,\n },\n {\n \"mode\": \"get_issue\",\n \"name\": \"Get Issue\",\n \"description\": GET_ISSUE_PROMPT,\n },\n {\n \"mode\": \"comment_on_issue\",\n \"name\": \"Comment on Issue\",\n \"description\": COMMENT_ON_ISSUE_PROMPT,\n },\n {\n \"mode\": \"create_file\",\n \"name\": \"Create File\",\n \"description\": CREATE_FILE_PROMPT,\n },\n {\n \"mode\": \"read_file\"," + }, + { + "fileName": "libs/langchain/langchain/agents/chat/base.py", + "content": "from typing import Any, List, Optional, Sequence, Tuple\n\nfrom pydantic import Field\n\nfrom langchain.agents.agent import Agent, AgentOutputParser\nfrom langchain.agents.chat.output_parser import ChatOutputParser\nfrom langchain.agents.chat.prompt import (\n FORMAT_INSTRUCTIONS,\n HUMAN_MESSAGE,\n SYSTEM_MESSAGE_PREFIX,\n SYSTEM_MESSAGE_SUFFIX,\n)\nfrom langchain.agents.utils import validate_tools_single_input\nfrom langchain.callbacks.base import BaseCallbackManager\nfrom langchain.chains.llm import LLMChain\nfrom langchain.prompts.chat import (\n ChatPromptTemplate,\n HumanMessagePromptTemplate,\n SystemMessagePromptTemplate,\n)\nfrom langchain.schema import AgentAction, BasePromptTemplate\nfrom langchain.schema.language_model import BaseLanguageModel\nfrom langchain.tools.base import BaseTool\n\n\nclass ChatAgent(Agent):\n \"\"\"Chat Agent.\"\"\"\n\n output_parser: AgentOutputParser = Field(default_factory=ChatOutputParser)\n \"\"\"Output parser for the agent.\"\"\"\n\n @property\n def observation_prefix(self) -> str:\n \"\"\"Prefix to append the observation with.\"\"\"\n return \"Observation: \"\n\n @property\n def llm_prefix(self) -> str:\n \"\"\"Prefix to append the llm call with.\"\"\"\n return \"Thought:\"\n\n def _construct_scratchpad(\n self, intermediate_steps: List[Tuple[AgentAction, str]]\n ) -> str:\n agent_scratchpad = super()._construct_scratchpad(intermediate_steps)\n if not isinstance(agent_scratchpad, str):\n raise ValueError(\"agent_scratchpad should be of type string.\")\n if agent_scratchpad:\n return (\n f\"This was your previous work \"" + }, + { + "fileName": "libs/langchain/langchain/agents/conversational/base.py", + "content": "from __future__ import annotations\n\nfrom typing import Any, List, Optional, Sequence\n\nfrom pydantic import Field\n\nfrom langchain.agents.agent import Agent, AgentOutputParser\nfrom langchain.agents.agent_types import AgentType\nfrom langchain.agents.conversational.output_parser import ConvoOutputParser\nfrom langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX\nfrom langchain.agents.utils import validate_tools_single_input\nfrom langchain.callbacks.base import BaseCallbackManager\nfrom langchain.chains import LLMChain\nfrom langchain.prompts import PromptTemplate\nfrom langchain.schema.language_model import BaseLanguageModel\nfrom langchain.tools.base import BaseTool\n\n\nclass ConversationalAgent(Agent):\n \"\"\"An agent that holds a conversation in addition to using tools.\"\"\"\n\n ai_prefix: str = \"AI\"\n \"\"\"Prefix to use before AI output.\"\"\"\n output_parser: AgentOutputParser = Field(default_factory=ConvoOutputParser)\n \"\"\"Output parser for the agent.\"\"\"\n\n @classmethod\n def _get_default_output_parser(\n cls, ai_prefix: str = \"AI\", **kwargs: Any\n ) -> AgentOutputParser:\n return ConvoOutputParser(ai_prefix=ai_prefix)\n\n @property\n def _agent_type(self) -> str:\n \"\"\"Return Identifier of agent type.\"\"\"\n return AgentType.CONVERSATIONAL_REACT_DESCRIPTION\n\n @property\n def observation_prefix(self) -> str:\n \"\"\"Prefix to append the observation with.\"\"\"\n return \"Observation: \"\n\n @property\n def llm_prefix(self) -> str:\n \"\"\"Prefix to append the llm call with.\"\"\"\n return \"Thought:\"\n\n @classmethod\n def create_prompt(\n cls," + }, + { + "fileName": "libs/langchain/langchain/agents/agent_toolkits/github/__init__.py", + "content": "\"\"\"GitHub Toolkit.\"\"\"\n" + } + ], + "fileName": "libs/langchain/tests/integration_tests/utilities/test_github.py", + "languageId": "python", + "content": "\"\"\"Integration test for Github Wrapper.\"\"\"\nimport pytest\n\nfrom langchain.utilities.github import GitHubAPIWrapper\n\n# Make sure you have set the following env variables:\n# GITHUB_REPOSITORY\n# GITHUB_BRANCH\n# GITHUB_APP_ID\n# GITHUB_PRIVATE_KEY\n\n\n@pytest.fixture\ndef api_client() -> GitHubAPIWrapper:\n return GitHubAPIWrapper()\n\n\ndef test_get_open_issues(api_client: GitHubAPIWrapper) -> None:\n ️🔥" + } + }, + { + "completions": [ + "from_toolkit(self)" + ], + "elapsed": 941, + "timestamp": "1690539528108", + "sample": { + "context": [ + { + "fileName": "libs/langchain/langchain/tools/playwright/click.py", + "content": "from __future__ import annotations\n\nfrom typing import Optional, Type\n\nfrom pydantic import BaseModel, Field\n\nfrom langchain.callbacks.manager import (\n AsyncCallbackManagerForToolRun,\n CallbackManagerForToolRun,\n)\nfrom langchain.tools.playwright.base import BaseBrowserTool\nfrom langchain.tools.playwright.utils import (\n aget_current_page,\n get_current_page,\n)\n\n\nclass ClickToolInput(BaseModel):\n \"\"\"Input for ClickTool.\"\"\"\n\n selector: str = Field(..., description=\"CSS selector for the element to click\")\n\n\nclass ClickTool(BaseBrowserTool):\n \"\"\"Tool for clicking on an element with the given CSS selector.\"\"\"\n\n name: str = \"click_element\"\n description: str = \"Click on an element with the given CSS selector\"\n args_schema: Type[BaseModel] = ClickToolInput\n\n visible_only: bool = True\n \"\"\"Whether to consider only visible elements.\"\"\"\n playwright_strict: bool = False\n \"\"\"Whether to employ Playwright's strict mode when clicking on elements.\"\"\"\n playwright_timeout: float = 1_000\n \"\"\"Timeout (in ms) for Playwright to wait for element to be ready.\"\"\"\n\n def _selector_effective(self, selector: str) -> str:\n if not self.visible_only:\n return selector\n return f\"{selector} >> visible=1\"\n\n def _run(\n self,\n selector: str,\n run_manager: Optional[CallbackManagerForToolRun] = None,\n ) -> str:\n \"\"\"Use the tool.\"\"\"\n if self.sync_browser is None:\n raise ValueError(f\"Synchronous browser not provided to {self.name}\")" + }, + { + "fileName": "libs/langchain/langchain/document_loaders/url_playwright.py", + "content": "\"\"\"\nimport logging\nfrom typing import List, Optional\n\nfrom langchain.docstore.document import Document\nfrom langchain.document_loaders.base import BaseLoader\n\nlogger = logging.getLogger(__name__)\n\n\nclass PlaywrightURLLoader(BaseLoader):\n \"\"\"Loader that uses Playwright and to load a page and unstructured to load the html.\n This is useful for loading pages that require javascript to render.\n\n Attributes:\n urls (List[str]): List of URLs to load.\n continue_on_failure (bool): If True, continue loading other URLs on failure.\n headless (bool): If True, the browser will run in headless mode.\n \"\"\"\n\n def __init__(\n self,\n urls: List[str],\n continue_on_failure: bool = True,\n headless: bool = True,\n remove_selectors: Optional[List[str]] = None,\n ):\n \"\"\"Load a list of URLs using Playwright and unstructured.\"\"\"\n try:\n import playwright # noqa:F401\n except ImportError:\n raise ImportError(\n \"playwright package not found, please install it with \"\n \"`pip install playwright`\"\n )\n\n try:\n import unstructured # noqa:F401\n except ImportError:\n raise ValueError(\n \"unstructured package not found, please install it with \"\n \"`pip install unstructured`\"\n )\n\n self.urls = urls\n self.continue_on_failure = continue_on_failure\n self.headless = headless\n self.remove_selectors = remove_selectors\n\n def load(self) -> List[Document]:" + }, + { + "fileName": "libs/langchain/langchain/agents/agent_toolkits/playwright/__init__.py", + "content": "\"\"\"Playwright browser toolkit.\"\"\"\nfrom langchain.agents.agent_toolkits.playwright.toolkit import PlayWrightBrowserToolkit\n\n__all__ = [\"PlayWrightBrowserToolkit\"]\n" + } + ], + "fileName": "libs/langchain/langchain/agents/agent_toolkits/playwright/toolkit.py", + "languageId": "python", + "content": "\"\"\"Playwright web browser toolkit.\"\"\"\nfrom __future__ import annotations\n\nfrom typing import TYPE_CHECKING, List, Optional, Type, cast\n\nfrom pydantic import Extra, root_validator\n\nfrom langchain.agents.agent_toolkits.base import BaseToolkit\nfrom langchain.tools.base import BaseTool\nfrom langchain.tools.playwright.base import (\n BaseBrowserTool,\n lazy_import_playwright_browsers,\n)\nfrom langchain.tools.playwright.click import ClickTool\nfrom langchain.tools.playwright.current_page import CurrentWebPageTool\nfrom langchain.tools.playwright.extract_hyperlinks import ExtractHyperlinksTool\nfrom langchain.tools.playwright.extract_text import ExtractTextTool\nfrom langchain.tools.playwright.get_elements import GetElementsTool\nfrom langchain.tools.playwright.navigate import NavigateTool\nfrom langchain.tools.playwright.navigate_back import NavigateBackTool\n\nif TYPE_CHECKING:\n from playwright.async_api import Browser as AsyncBrowser\n from playwright.sync_api import Browser as SyncBrowser\nelse:\n try:\n # We do this so pydantic can resolve the types when instantiating\n from playwright.async_api import Browser as AsyncBrowser\n from playwright.sync_api import Browser as SyncBrowser\n except ImportError:\n pass\n\n\nclass PlayWrightBrowserToolkit(BaseToolkit):\n \"\"\"Toolkit for PlayWright browser tools.\"\"\"\n\n sync_browser: Optional[\"SyncBrowser\"] = None\n async_browser: Optional[\"AsyncBrowser\"] = None\n\n class Config:\n \"\"\"Configuration for this pydantic object.\"\"\"\n\n extra = Extra.forbid\n arbitrary_types_allowed = True\n\n @root_validator\n def validate_imports_and_browser_provided(cls, values: dict) -> dict:\n \"\"\"Check that the arguments are valid.\"\"\"\n lazy_import_playwright_browsers()\n if values.get(\"async_browser\") is None and values.get(\"sync_browser\") is None:\n raise ValueError(\"Either async_browser or sync_browser must be specified.\")\n return values\n\n def get_tools(self) -> List[BaseTool]:\n \"\"\"Get the tools in the toolkit.\"\"\"\n tool_classes: List[Type[BaseBrowserTool]] = [\n ClickTool,\n NavigateTool,\n NavigateBackTool,\n ExtractTextTool,\n ExtractHyperlinksTool,\n GetElementsTool,\n CurrentWebPageTool,\n ]\n\n tools = [\n tool_cls.️🔥\n for tool_cls in tool_classes\n ]\n return cast(List[BaseTool], tools)\n\n @classmethod\n def from_browser(\n cls,\n sync_browser: Optional[SyncBrowser] = None,\n async_browser: Optional[AsyncBrowser] = None,\n ) -> PlayWrightBrowserToolkit:\n \"\"\"Instantiate the toolkit.\"\"\"\n # This is to raise a better error than the forward ref ones Pydantic would have\n lazy_import_playwright_browsers()\n return cls(sync_browser=sync_browser, async_browser=async_browser)" + } + }, + { + "completions": [ + "config: providerConfig,\n cache,\n history: new History(),\n statusBar: noopStatusBar,\n triggerMoreEagerly,", + "cache,\n providerConfig,\n history: new History(),\n statusBar: noopStatusBar,\n triggerMoreEagerly,", + "cache,\n history: new History(),\n providerConfig,\n noopStatusBar,\n triggerMoreEagerly," + ], + "elapsed": 3628, + "timestamp": "1690539528108", + "sample": { + "context": [ + { + "fileName": "src/completions/index.ts", + "content": " this.abortOpenCompletions = () => {\n previousAbort()\n stopLoading()\n }\n\n const completions = await this.requestManager.request(\n document.uri.toString(),\n logId,\n prefix,\n completers,\n contextResult.context,\n abortController.signal\n )\n\n // Shared post-processing logic\n const processedCompletions = processCompletions(completions, prefix, suffix, multiline, document.languageId)\n stopLoading()\n\n if (processedCompletions.length > 0) {\n CompletionLogger.suggest(logId)\n return toInlineCompletionItems(logId, document, position, processedCompletions)\n }\n\n CompletionLogger.noResponse(logId)\n return { items: [] }\n }\n}\n\nexport interface Completion {\n prefix: string\n content: string\n stopReason?: string\n}\n\nfunction handleCacheHit(\n cachedCompletions: CachedCompletions,\n document: vscode.TextDocument,\n position: vscode.Position,\n prefix: string,\n suffix: string,\n multiline: boolean,\n languageId: string\n): vscode.InlineCompletionList {\n const results = processCompletions(cachedCompletions.completions, prefix, suffix, multiline, languageId)\n return toInlineCompletionItems(cachedCompletions.logId, document, position, results)\n}\n\nfunction processCompletions(\n completions: Completion[],\n prefix: string," + }, + { + "fileName": "src/completions/document.ts", + "content": "import * as vscode from 'vscode'\n\n/**\n * Get the current document context based on the cursor position in the current document.\n *\n * This function is meant to provide a context around the current position in the document,\n * including a prefix, a suffix, the previous line, the previous non-empty line, and the next non-empty line.\n * The prefix and suffix are obtained by looking around the current position up to a max length\n * defined by `maxPrefixLength` and `maxSuffixLength` respectively. If the length of the entire\n * document content in either direction is smaller than these parameters, the entire content will be used.\n *w\n *\n * @param document - A `vscode.TextDocument` object, the document in which to find the context.\n * @param position - A `vscode.Position` object, the position in the document from which to find the context.\n * @param maxPrefixLength - A number representing the maximum length of the prefix to get from the document.\n * @param maxSuffixLength - A number representing the maximum length of the suffix to get from the document.\n *\n * @returns An object containing the current document context or null if there are no lines in the document.\n */\nexport function getCurrentDocContext(\n document: vscode.TextDocument,\n position: vscode.Position,\n maxPrefixLength: number,\n maxSuffixLength: number\n): {\n prefix: string\n suffix: string\n prevLine: string\n prevNonEmptyLine: string\n nextNonEmptyLine: string\n} | null {\n const offset = document.offsetAt(position)\n\n const prefixLines = document.getText(new vscode.Range(new vscode.Position(0, 0), position)).split('\\n')\n\n if (prefixLines.length === 0) {\n console.error('no lines')\n return null\n }\n\n const suffixLines = document\n .getText(new vscode.Range(position, document.positionAt(document.getText().length)))\n .split('\\n')\n\n let nextNonEmptyLine = ''\n if (suffixLines.length > 0) {\n for (const line of suffixLines) {\n if (line.trim().length > 0) {\n nextNonEmptyLine = line\n break" + }, + { + "fileName": "src/completions/docprovider.ts", + "content": "\n public addCompletions(uri: vscode.Uri, lang: string, completions: Completion[], debug?: Meta): void {\n if (!this.completionsByUri[uri.toString()]) {\n this.completionsByUri[uri.toString()] = []\n }\n\n this.completionsByUri[uri.toString()].push({\n lang,\n completions,\n meta: debug,\n })\n this.fireDocumentChanged(uri)\n }\n\n public onDidChangeEmitter = new vscode.EventEmitter()\n public onDidChange = this.onDidChangeEmitter.event\n\n public provideTextDocumentContent(uri: vscode.Uri): string {\n const completionGroups = this.completionsByUri[uri.toString()]\n if (!completionGroups) {\n return 'Loading...'\n }\n\n return completionGroups\n .map(({ completions, lang }) =>\n completions\n .map(({ content, stopReason: finishReason }, index) => {\n const completionText = `\\`\\`\\`${lang}\\n${content}\\n\\`\\`\\``\n const headerComponents = [`${index + 1} / ${completions.length}`]\n if (finishReason) {\n headerComponents.push(`finish_reason:${finishReason}`)\n }\n return headerize(headerComponents.join(', '), 80) + '\\n' + completionText\n })\n .filter(t => t)\n .join('\\n\\n')\n )\n .join('\\n\\n')\n }\n}\n\nfunction headerize(label: string, width: number): string {\n const prefix = '# ======= '\n let buffer = width - label.length - prefix.length - 1\n if (buffer < 0) {\n buffer = 0\n }\n return `${prefix}${label} ${'='.repeat(buffer)}`\n}\n" + }, + { + "fileName": "src/completions/history.ts", + "content": " if (register) {\n const disposable = register()\n if (disposable) {\n this.subscriptions.push(disposable)\n }\n }\n }\n\n public dispose(): void {\n vscode.Disposable.from(...this.subscriptions).dispose()\n }\n\n public addItem(newItem: HistoryItem): void {\n if (newItem.document.uri.scheme === 'codegen') {\n return\n }\n const foundIndex = this.history.findIndex(\n item => item.document.uri.toString() === newItem.document.uri.toString()\n )\n if (foundIndex >= 0) {\n this.history = [...this.history.slice(0, foundIndex), ...this.history.slice(foundIndex + 1)]\n }\n this.history.push(newItem)\n if (this.history.length > this.window) {\n this.history.shift()\n }\n }\n\n /**\n * Returns the last n items of history in reverse chronological order (latest item at the front)\n */\n public lastN(n: number, languageId?: string, ignoreUris?: vscode.Uri[]): HistoryItem[] {\n const ret: HistoryItem[] = []\n const ignoreSet = new Set(ignoreUris || [])\n for (let i = this.history.length - 1; i >= 0; i--) {\n const item = this.history[i]\n if (ret.length > n) {\n break\n }\n if (ignoreSet.has(item.document.uri)) {\n continue\n }\n if (languageId && languageId !== item.document.languageId) {\n continue\n }\n ret.push(item)\n }\n return ret\n }\n}" + } + ], + "fileName": "src/completions/completion.test.ts", + "languageId": "typescript", + "content": "import { beforeEach, describe, expect, it, vi } from 'vitest'\nimport type * as vscode from 'vscode'\nimport { URI } from 'vscode-uri'\n\nimport {\n CompletionParameters,\n CompletionResponse,\n} from '@sourcegraph/cody-shared/src/sourcegraph-api/completions/types'\n\nimport { vsCodeMocks } from '../testutils/mocks'\n\nimport { CodyCompletionItemProvider } from '.'\nimport { CompletionsCache } from './cache'\nimport { History } from './history'\nimport { createProviderConfig } from './providers/anthropic'\n\nvi.mock('vscode', () => ({\n ...vsCodeMocks,\n InlineCompletionTriggerKind: {\n Invoke: 0,\n Automatic: 1,\n },\n workspace: {\n ...vsCodeMocks.workspace,\n asRelativePath(path: string) {\n return path\n },\n onDidChangeTextDocument() {\n return null\n },\n },\n window: {\n ...vsCodeMocks.window,\n visibleTextEditors: [],\n tabGroups: { all: [] },\n },\n}))\n\nvi.mock('./context-embeddings.ts', () => ({\n getContextFromEmbeddings: () => [],\n}))\n\nfunction createCompletionResponse(completion: string): CompletionResponse {\n return {\n completion: truncateMultilineString(completion),\n stopReason: 'unknown',\n }\n}\n\nconst noopStatusBar = {\n startLoading: () => () => {},\n} as any\n\nconst CURSOR_MARKER = ''\n\n/**\n * A helper function used so that the below code example can be intended in code but will have their\n * prefix stripped. This is similar to what Vitest snapshots use but without the prettier hack so that\n * the starting ` is always in the same line as the function name :shrug:\n */\nfunction truncateMultilineString(string: string): string {\n const lines = string.split('\n')\n\n if (lines.length <= 1) {\n return string\n }\n\n if (lines[0] !== '') {\n return string\n }\n\n const regex = lines[1].match(/^ */)\n\n const indentation = regex ? regex[0] : ''\n return lines\n .map(line => (line.startsWith(indentation) ? line.replace(indentation, '') : line))\n .slice(1)\n .join('\n')\n}\n\ndescribe('Cody completions', () => {\n /**\n * A test helper to trigger a completion request. The code example must include\n * a pipe character to denote the current cursor position.\n *\n * @example\n * complete(`\n * async function foo() {\n * ${CURSOR_MARKER}\n * }`)\n */\n let complete: (\n code: string,\n responses?: CompletionResponse[] | 'stall',\n languageId?: string,\n context?: vscode.InlineCompletionContext,\n triggerMoreEagerly?: boolean\n ) => Promise<{\n requests: CompletionParameters[]\n completions: vscode.InlineCompletionItem[]\n }>\n beforeEach(() => {\n const cache = new CompletionsCache()\n complete = async (\n code: string,\n responses?: CompletionResponse[] | 'stall',\n languageId: string = 'typescript',\n context: vscode.InlineCompletionContext = { triggerKind: 1, selectedCompletionInfo: undefined },\n triggerMoreEagerly = true\n ): Promise<{\n requests: CompletionParameters[]\n completions: vscode.InlineCompletionItem[]\n }> => {\n code = truncateMultilineString(code)\n\n const requests: CompletionParameters[] = []\n let requestCounter = 0\n const completionsClient: any = {\n complete(params: CompletionParameters): Promise {\n requests.push(params)\n if (responses === 'stall') {\n // Creates a stalling request that never responds\n return new Promise(() => {})\n }\n return Promise.resolve(responses?.[requestCounter++] || { completion: '', stopReason: 'unknown' })\n },\n }\n const providerConfig = createProviderConfig({\n completionsClient,\n contextWindowTokens: 2048,\n })\n const completionProvider = new CodyCompletionItemProvider({\n ️🔥\n })\n\n if (!code.includes(CURSOR_MARKER)) {\n throw new Error('The test code must include a | to denote the cursor position')\n }\n\n const cursorIndex = code.indexOf(CURSOR_MARKER)\n const prefix = code.slice(0, cursorIndex)\n const suffix = code.slice(cursorIndex + CURSOR_MARKER.length)\n\n const codeWithoutCursor = prefix + suffix\n\n const token: any = {\n onCancellationRequested() {\n return null\n },\n }\n const document: any = {\n filename: 'test.ts',\n uri: URI.parse('file:///test.ts'),\n languageId," + } + } +] diff --git a/lib/shared/src/configuration.ts b/lib/shared/src/configuration.ts index cc7e90673b34..ca4c80e40f3f 100644 --- a/lib/shared/src/configuration.ts +++ b/lib/shared/src/configuration.ts @@ -16,7 +16,7 @@ export interface Configuration { experimentalCustomRecipes: boolean experimentalGuardrails: boolean experimentalNonStop: boolean - autocompleteAdvancedProvider: 'anthropic' | 'unstable-codegen' | 'unstable-huggingface' + autocompleteAdvancedProvider: 'anthropic' | 'unstable-codegen' | 'unstable-huggingface' | 'unstable-fireworks' autocompleteAdvancedServerEndpoint: string | null autocompleteAdvancedAccessToken: string | null autocompleteAdvancedCache: boolean diff --git a/vscode/package.json b/vscode/package.json index e152dcec6a8a..158ecd6caf5c 100644 --- a/vscode/package.json +++ b/vscode/package.json @@ -869,17 +869,18 @@ "enum": [ "anthropic", "unstable-codegen", - "unstable-huggingface" + "unstable-huggingface", + "unstable-fireworks" ], - "markdownDescription": "Overwrite the provider used for code autocomplete. Only supported values at the moment are `anthropic` (default), `unstable-codegen`, or `unstable-huggingface`." + "markdownDescription": "Overwrite the provider used for code autocomplete." }, "cody.autocomplete.advanced.serverEndpoint": { "type": "string", - "markdownDescription": "Overwrite the server endpoint used for code autocomplete. This is only supported with the `unstable-codegen` or `unstable-huggingface` provider." + "markdownDescription": "Overwrite the server endpoint used for code autocomplete. This is only supported with a provider other than `anthropic`." }, "cody.autocomplete.advanced.accessToken": { "type": "string", - "markdownDescription": "Overwrite the access token used for code autocomplete. This is only supported with the `unstable-huggingface` provider." + "markdownDescription": "Overwrite the access token used for code autocomplete. This is only supported with a provider other than `anthropic`." }, "cody.autocomplete.advanced.cache": { "type": "boolean", diff --git a/vscode/src/completions/providers/createProvider.ts b/vscode/src/completions/providers/createProvider.ts index 1fc543bb4a89..b00657b1e312 100644 --- a/vscode/src/completions/providers/createProvider.ts +++ b/vscode/src/completions/providers/createProvider.ts @@ -4,6 +4,7 @@ import { SourcegraphNodeCompletionsClient } from '@sourcegraph/cody-shared/src/s import { createProviderConfig as createAnthropicProviderConfig } from './anthropic' import { ProviderConfig } from './provider' import { createProviderConfig as createUnstableCodeGenProviderConfig } from './unstable-codegen' +import { createProviderConfig as createUnstableFireworksProviderConfig } from './unstable-fireworks' import { createProviderConfig as createUnstableHuggingFaceProviderConfig } from './unstable-huggingface' export function createProviderConfig( @@ -40,6 +41,20 @@ export function createProviderConfig( ) break } + case 'unstable-fireworks': { + if (config.autocompleteAdvancedServerEndpoint !== null) { + providerConfig = createUnstableFireworksProviderConfig({ + serverEndpoint: config.autocompleteAdvancedServerEndpoint, + accessToken: config.autocompleteAdvancedAccessToken, + }) + break + } + + onError( + 'Provider `unstable-fireworks` can not be used without configuring `cody.autocomplete.advanced.serverEndpoint`. Falling back to `anthropic`.' + ) + break + } } if (providerConfig) { return providerConfig diff --git a/vscode/src/completions/providers/unstable-fireworks.ts b/vscode/src/completions/providers/unstable-fireworks.ts new file mode 100644 index 000000000000..52ebf51cc876 --- /dev/null +++ b/vscode/src/completions/providers/unstable-fireworks.ts @@ -0,0 +1,151 @@ +import fetch from 'isomorphic-fetch' + +import { Completion } from '..' +import { logger } from '../../log' +import { ReferenceSnippet } from '../context' +import { getLanguageConfig } from '../language' +import { isAbortError } from '../utils' + +import { Provider, ProviderConfig, ProviderOptions } from './provider' + +interface UnstableFireworksOptions { + serverEndpoint: string + accessToken: null | string +} + +const PROVIDER_IDENTIFIER = 'fireworks' +const STOP_WORD = '<|endoftext|>' +const CONTEXT_WINDOW_CHARS = 3500 // ~ 1280 token limit + +export class UnstableFireworksProvider extends Provider { + private serverEndpoint: string + private accessToken: null | string + + constructor(options: ProviderOptions, unstableFireworksOptions: UnstableFireworksOptions) { + super(options) + this.serverEndpoint = unstableFireworksOptions.serverEndpoint + this.accessToken = unstableFireworksOptions.accessToken + } + + private createPrompt(snippets: ReferenceSnippet[]): string { + const maxPromptChars = CONTEXT_WINDOW_CHARS - CONTEXT_WINDOW_CHARS * this.options.responsePercentage + + const intro: string[] = [] + let prompt = '' + + const languageConfig = getLanguageConfig(this.options.languageId) + if (languageConfig) { + intro.push(`Path: ${this.options.fileName}`) + } + + for (let snippetsToInclude = 0; snippetsToInclude < snippets.length + 1; snippetsToInclude++) { + if (snippetsToInclude > 0) { + const snippet = snippets[snippetsToInclude - 1] + intro.push(`Here is a reference snippet of code from ${snippet.fileName}:\n\n${snippet.content}`) + } + + const introString = + intro + .join('\n\n') + .split('\n') + .map(line => (languageConfig ? languageConfig.commentStart + line : '')) + .join('\n') + '\n' + + // Prompt format is taken form https://starcoder.co/bigcode/starcoder#fill-in-the-middle + const nextPrompt = `${introString}${this.options.prefix}${this.options.suffix}` + + if (nextPrompt.length >= maxPromptChars) { + return prompt + } + + prompt = nextPrompt + } + + return prompt + } + + public async generateCompletions(abortSignal: AbortSignal, snippets: ReferenceSnippet[]): Promise { + const prompt = this.createPrompt(snippets) + + const request = { + prompt, + // To speed up sample generation in single-line case, we request a lower token limit + // since we can't terminate on the first `\n`. + max_tokens: this.options.multiline ? 256 : 30, + temperature: 0.4, + top_p: 0.95, + min_tokens: 1, + n: this.options.n, + echo: false, + model: 'fireworks-starcoder-16b-w8a16', + } + console.log(request) + + const log = logger.startCompletion({ + request, + provider: PROVIDER_IDENTIFIER, + serverEndpoint: this.serverEndpoint, + }) + + const response = await fetch(this.serverEndpoint, { + method: 'POST', + body: JSON.stringify(request), + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.accessToken}`, + }, + signal: abortSignal, + }) + + try { + const data = (await response.json()) as + | { choices: { text: string; finish_reason: string }[] } + | { error: string } + + if ('error' in data) { + throw new Error(data.error) + } + + const completions = data.choices.map(c => ({ + content: postProcess(c.text, this.options.multiline), + stopReason: c.finish_reason, + })) + log?.onComplete(completions.map(c => c.content)) + + return completions.map(c => ({ + prefix: this.options.prefix, + content: c.content, + stopReason: c.stopReason, + })) + } catch (error: any) { + if (!isAbortError(error)) { + log?.onError(error) + } + + throw error + } + } +} + +function postProcess(content: string, multiline: boolean): string { + content = content.replace(STOP_WORD, '') + + // The model might return multiple lines for single line completions because + // we are only able to specify a token limit. + if (!multiline && content.includes('\n')) { + content = content.slice(0, content.indexOf('\n')) + } + + return content.trim() +} + +export function createProviderConfig(unstableFireworksOptions: UnstableFireworksOptions): ProviderConfig { + return { + create(options: ProviderOptions) { + return new UnstableFireworksProvider(options, unstableFireworksOptions) + }, + maximumContextCharacters: CONTEXT_WINDOW_CHARS, + enableExtendedMultilineTriggers: true, + identifier: PROVIDER_IDENTIFIER, + } +} diff --git a/vscode/src/configuration.ts b/vscode/src/configuration.ts index 788c2f6ff264..5b2e5893e729 100644 --- a/vscode/src/configuration.ts +++ b/vscode/src/configuration.ts @@ -36,14 +36,14 @@ export function getConfiguration(config: ConfigGetter): Configuration { debugRegex = new RegExp('.*') } - let autocompleteAdvancedProvider = config.get<'anthropic' | 'unstable-codegen' | 'unstable-huggingface'>( - CONFIG_KEY.autocompleteAdvancedProvider, - 'anthropic' - ) + let autocompleteAdvancedProvider = config.get< + 'anthropic' | 'unstable-codegen' | 'unstable-huggingface' | 'unstable-fireworks' + >(CONFIG_KEY.autocompleteAdvancedProvider, 'anthropic') if ( autocompleteAdvancedProvider !== 'anthropic' && autocompleteAdvancedProvider !== 'unstable-codegen' && - autocompleteAdvancedProvider !== 'unstable-huggingface' + autocompleteAdvancedProvider !== 'unstable-huggingface' && + autocompleteAdvancedProvider !== 'unstable-fireworks' ) { autocompleteAdvancedProvider = 'anthropic' void vscode.window.showInformationMessage(