Skip to content

Commit

Permalink
fix context logging payload and add identifier field in context snipp…
Browse files Browse the repository at this point in the history
…ets (#5507)

## Context
1. The PR fixes the `contextCandidates` logged in the
`inlineCompletionItemContext` to the `rankedContextCandidates`. Without
this, the offline dataset is not much useful, since the candidates are
limited to items sent in the final prompt, and hence we are not able to
evaluate the effect on other context items.
2. Adds a field `identifier` to the `AutocompleteContextItem` to help
identify, from which retriever does this item come from.

## Test plan
1. Added the test cases to check payload limits before logging 
2. CI Checks
  • Loading branch information
hitesh-1997 committed Sep 10, 2024
1 parent c9e483d commit 03d6ddc
Show file tree
Hide file tree
Showing 16 changed files with 191 additions and 26 deletions.
1 change: 1 addition & 0 deletions lib/shared/src/completions/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type * as vscode from 'vscode'
import type { URI } from 'vscode-uri'

export interface AutocompleteFileContextSnippet {
identifier: string
uri: URI
startLine: number
endLine: number
Expand Down
24 changes: 24 additions & 0 deletions vscode/src/completions/context/context-mixer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,14 @@ describe('ContextMixer', () => {
createMockStrategy([
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo() {}',
startLine: 0,
endLine: 0,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('bar.ts'),
content: 'function bar() {}',
startLine: 0,
Expand All @@ -107,12 +109,14 @@ describe('ContextMixer', () => {
{
fileName: 'foo.ts',
content: 'function foo() {}',
identifier: 'jaccard-similarity',
startLine: 0,
endLine: 0,
},
{
fileName: 'bar.ts',
content: 'function bar() {}',
identifier: 'jaccard-similarity',
startLine: 0,
endLine: 0,
},
Expand Down Expand Up @@ -142,12 +146,14 @@ describe('ContextMixer', () => {
createMockStrategy([
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}',
startLine: 0,
endLine: 0,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('bar.ts'),
content: 'function bar1() {}',
startLine: 0,
Expand All @@ -157,18 +163,21 @@ describe('ContextMixer', () => {

[
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo3() {}',
startLine: 10,
endLine: 10,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}\nfunction foo2() {}',
startLine: 0,
endLine: 1,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('bar.ts'),
content: 'function bar1() {}\nfunction bar2() {}',
startLine: 0,
Expand All @@ -188,32 +197,37 @@ describe('ContextMixer', () => {
"content": "function foo1() {}",
"endLine": 0,
"fileName": "foo.ts",
"identifier": "jaccard-similarity",
"startLine": 0,
},
{
"content": "function foo1() {}
function foo2() {}",
"endLine": 1,
"fileName": "foo.ts",
"identifier": "jaccard-similarity",
"startLine": 0,
},
{
"content": "function bar1() {}",
"endLine": 0,
"fileName": "bar.ts",
"identifier": "jaccard-similarity",
"startLine": 0,
},
{
"content": "function bar1() {}
function bar2() {}",
"endLine": 1,
"fileName": "bar.ts",
"identifier": "jaccard-similarity",
"startLine": 0,
},
{
"content": "function foo3() {}",
"endLine": 10,
"fileName": "foo.ts",
"identifier": "jaccard-similarity",
"startLine": 10,
},
]
Expand Down Expand Up @@ -260,12 +274,14 @@ describe('ContextMixer', () => {
createMockStrategy([
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}',
startLine: 0,
endLine: 0,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo/bar.ts'),
content: 'function bar1() {}',
startLine: 0,
Expand All @@ -274,18 +290,21 @@ describe('ContextMixer', () => {
],
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('test/foo.ts'),
content: 'function foo3() {}',
startLine: 10,
endLine: 10,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}\nfunction foo2() {}',
startLine: 0,
endLine: 1,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('example/bar.ts'),
content: 'function bar1() {}\nfunction bar2() {}',
startLine: 0,
Expand Down Expand Up @@ -322,12 +341,14 @@ describe('ContextMixer', () => {
createMockStrategy([
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}',
startLine: 0,
endLine: 0,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo/bar.ts'),
content: 'function bar1() {}',
startLine: 0,
Expand All @@ -336,18 +357,21 @@ describe('ContextMixer', () => {
],
[
{
identifier: 'jaccard-similarity',
uri: testFileUri('test/foo.ts'),
content: 'function foo3() {}',
startLine: 10,
endLine: 10,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('foo.ts'),
content: 'function foo1() {}\nfunction foo2() {}',
startLine: 0,
endLine: 1,
},
{
identifier: 'jaccard-similarity',
uri: testFileUri('example/bar.ts'),
content: 'function bar1() {}\nfunction bar2() {}',
startLine: 0,
Expand Down
3 changes: 3 additions & 0 deletions vscode/src/completions/context/context-mixer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export interface ContextSummary {
export interface GetContextResult {
context: AutocompleteContextSnippet[]
logSummary: ContextSummary
rankedContextCandidates: AutocompleteContextSnippet[]
}

/**
Expand Down Expand Up @@ -90,6 +91,7 @@ export class ContextMixer implements vscode.Disposable {
duration: 0,
retrieverStats: {},
},
rankedContextCandidates: [],
}
}

Expand Down Expand Up @@ -172,6 +174,7 @@ export class ContextMixer implements vscode.Disposable {
return {
context: mixedContext,
logSummary,
rankedContextCandidates: Array.from(fusedResults),
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ import { captureException } from '../../../../services/sentry/sentry'
import type { ContextRetriever, ContextRetrieverOptions } from '../../../types'

import type { AutocompleteContextSnippet } from '@sourcegraph/cody-shared'
import { RetrieverIdentifier } from '../../utils'
import {
getLastNGraphContextIdentifiersFromDocument,
getLastNGraphContextIdentifiersFromString,
} from '../graph/identifiers'
import { type SimpleRepository, inferGitRepository } from './simple-git'

export class BfgRetriever implements ContextRetriever {
public identifier = 'bfg'
public identifier = RetrieverIdentifier.BfgRetriever
private loadedBFG: Promise<MessageHandler>
private bfgIndexingPromise = Promise.resolve<void>(undefined)
private awaitIndexing: boolean
Expand Down Expand Up @@ -235,6 +236,7 @@ export class BfgRetriever implements ContextRetriever {
// Convert BFG snippets to match the format expected on the client.
const symbols = (response.symbols || []).map(contextSnippet => ({
...contextSnippet,
identifier: RetrieverIdentifier.BfgRetriever,
uri: vscode.Uri.from({ scheme: 'file', path: contextSnippet.fileName }),
})) satisfies Omit<AutocompleteContextSnippet, 'startLine' | 'endLine'>[]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,14 @@ describe('bestJaccardMatch', () => {
score: 1,
content: 'foo\nbar\nbaz',
endLine: 2,
identifier: 'jaccard-similarity',
startLine: 0,
})
expect(bestJaccardMatches('bar\nquux', matchText, 4, MAX_MATCHES)[0]).toEqual({
score: 0.5,
content: 'bar\nbaz\nqux\nquux',
endLine: 4,
identifier: 'jaccard-similarity',
startLine: 1,
})
expect(
Expand All @@ -146,6 +148,7 @@ describe('bestJaccardMatch', () => {
score: 0.3,
startLine: 4,
endLine: 9,
identifier: 'jaccard-similarity',
content: ['quux', 'quuz', 'corge', 'grault', 'garply', 'waldo'].join('\n'),
})
})
Expand Down Expand Up @@ -206,6 +209,7 @@ describe('bestJaccardMatch', () => {
'foo',
'bar',",
"endLine": 4,
"identifier": "jaccard-similarity",
"score": 0.14285714285714285,
"startLine": 0,
}
Expand All @@ -216,6 +220,7 @@ describe('bestJaccardMatch', () => {
expect(bestJaccardMatches('foo', 'foo', 10, MAX_MATCHES)[0]).toEqual({
content: 'foo',
endLine: 0,
identifier: 'jaccard-similarity',
score: 1,
startLine: 0,
})
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { LRUCache } from 'lru-cache'
import winkUtils from 'wink-nlp-utils'
import { RetrieverIdentifier } from '../../utils'

const MAX_STEM_CACHE_SIZE = 30000

export interface JaccardMatch {
identifier: string
score: number
content: string
startLine: number
Expand Down Expand Up @@ -67,6 +69,7 @@ export function bestJaccardMatches(
// Initialize the result set with the first window
const windows: JaccardMatch[] = [
{
identifier: RetrieverIdentifier.JaccardSimilarityRetriever,
score: jaccardSimilarity(targetWordCounts, windowWordCounts, intersectionWordCounts),
content: lines.slice(firstWindowStart, firstWindowEnd + 1).join('\n'),
startLine: firstWindowStart,
Expand Down Expand Up @@ -118,6 +121,7 @@ export function bestJaccardMatches(
const startLine = i
const endLine = i + windowSize - 1
windows.push({
identifier: RetrieverIdentifier.JaccardSimilarityRetriever,
score,
content: lines.slice(startLine, endLine + 1).join('\n'),
startLine,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { type DocumentHistory, VSCodeDocumentHistory } from './history'
import { FeatureFlag, isDefined } from '@sourcegraph/cody-shared'
import { completionProviderConfig } from '../../../completion-provider-config'
import { lastNLines } from '../../../text-processing'
import { type ShouldUseContextParams, shouldBeUsedAsContext } from '../../utils'
import { RetrieverIdentifier, type ShouldUseContextParams, shouldBeUsedAsContext } from '../../utils'
import { type CachedRerieverOptions, CachedRetriever } from '../cached-retriever'
import { type JaccardMatch, bestJaccardMatches } from './bestJaccardMatch'

Expand Down Expand Up @@ -47,7 +47,7 @@ export class JaccardSimilarityRetriever extends CachedRetriever implements Conte
this.maxMatchesPerFile = options.maxMatchesPerFile ?? MAX_MATCHES_PER_FILE
}

public identifier = 'jaccard-similarity'
public identifier = RetrieverIdentifier.JaccardSimilarityRetriever
private history = new VSCodeDocumentHistory()

public async doRetrieval({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from '../../../../graph/lsp/symbol-context-snippets'
import { SupportedLanguage } from '../../../../tree-sitter/grammars'
import type { ContextRetriever, ContextRetrieverOptions } from '../../../types'
import { RetrieverIdentifier } from '../../utils'
import { getLastNGraphContextIdentifiersFromDocument } from '../graph/identifiers'

const SUPPORTED_LANGUAGES = new Set([
Expand All @@ -26,7 +27,7 @@ const RECURSION_LIMIT = 3
const IDENTIFIERS_TO_RESOLVE = 1

export class LspLightRetriever implements ContextRetriever {
public identifier = 'lsp-light'
public identifier = RetrieverIdentifier.LspLightRetriever
private disposables: vscode.Disposable[] = []
private isCacheDisabled = IS_LSP_LIGHT_CACHE_DISABLED

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { AutocompleteContextSnippet } from '@sourcegraph/cody-shared'
import * as vscode from 'vscode'
import { getLanguageConfig } from '../../../../tree-sitter/language'
import type { ContextRetriever, ContextRetrieverOptions } from '../../../types'
import { type ShouldUseContextParams, shouldBeUsedAsContext } from '../../utils'
import { RetrieverIdentifier, type ShouldUseContextParams, shouldBeUsedAsContext } from '../../utils'

interface TrackedDocument {
content: string
Expand All @@ -24,7 +24,7 @@ export class RecentEditsRetriever implements vscode.Disposable, ContextRetriever
// We use a map from the document URI to the set of tracked completions inside that document to
// improve performance of the `onDidChangeTextDocument` event handler.
private trackedDocuments: Map<string, TrackedDocument> = new Map()
public identifier = 'recent-edits'
public identifier = RetrieverIdentifier.RecentEditsRetriever
private disposables: vscode.Disposable[] = []

constructor(
Expand Down Expand Up @@ -54,6 +54,7 @@ export class RecentEditsRetriever implements vscode.Disposable, ContextRetriever
).toString()
const autocompleteSnippet = {
uri: diff.uri,
identifier: RetrieverIdentifier.RecentEditsRetriever,
content,
} satisfies Omit<AutocompleteContextSnippet, 'startLine' | 'endLine'>
autocompleteContextSnippets.push(autocompleteSnippet)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import type {
ProtocolRelatedInformationDiagnostic,
} from '../../../../jsonrpc/agent-protocol'
import type { ContextRetriever, ContextRetrieverOptions } from '../../../types'
import { RetrieverIdentifier } from '../../utils'
import { SymbolFormatter, isStdLibNode } from './SymbolFormatter'
import { getTSSymbolAtLocation } from './getTSSymbolAtLocation'
import { type NodeMatchKind, relevantTypeIdentifiers } from './relevantTypeIdentifiers'
Expand Down Expand Up @@ -112,7 +113,7 @@ interface DocumentSnapshot {
* information about the autocomplete request location.
*/
export class TscRetriever implements ContextRetriever {
public identifier = 'tsc'
public identifier = RetrieverIdentifier.TscRetriever

constructor(private options: TscRetrieverOptions = defaultTscRetrieverOptions()) {
this.disposables.push(
Expand Down Expand Up @@ -535,6 +536,7 @@ class SymbolCollector {
// Skip module declarations because they can be too large.
// We still format them to queue the referenced types.
const snippet: AutocompleteContextSnippet = {
identifier: RetrieverIdentifier.TscRetriever,
symbol: sym.name,
content,
startLine: start.line,
Expand Down
Loading

0 comments on commit 03d6ddc

Please sign in to comment.