Skip to content

Commit

Permalink
support @-mentioning URLs to use web page contents as context (#3436)
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs authored Mar 19, 2024
1 parent e1d62a6 commit 71c7080
Show file tree
Hide file tree
Showing 11 changed files with 229 additions and 18 deletions.
3 changes: 2 additions & 1 deletion lib/shared/src/chat/transcript/display-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type * as vscode from 'vscode'
import type { URI } from 'vscode-uri'
import type { ContextItem } from '../../codebase-context/messages'
import type { RangeData } from '../../common/range'
import { isURLContextItem } from '../../mentions/urlContextItems'

/**
* VS Code intentionally limits what `command:vscode.open?ARGS` can have for args (see
Expand Down Expand Up @@ -40,7 +41,7 @@ export function webviewOpenURIForContextItem(item: Pick<ContextItem, 'uri' | 'ra
href: string
target: '_blank' | undefined
} {
if (item.uri.scheme === 'http' || item.uri.scheme === 'https') {
if (isURLContextItem(item)) {
return {
href: item.uri.toString(),
target: '_blank',
Expand Down
3 changes: 3 additions & 0 deletions lib/shared/src/experimentation/FeatureFlagProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ export enum FeatureFlag {

// Show document hints above a symbol if the users' cursor is there. "Opt+D to Document"
CodyDocumentHints = 'cody-document-hints',

/** Support @-mentioning URLs in chat to add context from web pages. */
URLContext = 'cody-url-context',
}

const ONE_HOUR = 60 * 60 * 1000
Expand Down
5 changes: 5 additions & 0 deletions lib/shared/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,8 @@ export {
type MentionQuery,
scanForMentionTriggerInUserTextInput,
} from './mentions/query'
export {
getURLContextItems,
isURLContextItem,
fetchContentForURLContextItem,
} from './mentions/urlContextItems'
23 changes: 20 additions & 3 deletions lib/shared/src/mentions/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ describe('parseMentionQuery', () => {
text: '@baz',
})
})

test('url query with http:// prefix', () => {
expect(parseMentionQuery('http://example.com/p')).toEqual<MentionQuery>({
type: 'url',
text: 'http://example.com/p',
})
expect(parseMentionQuery('https://example.com/p')).toEqual<MentionQuery>({
type: 'url',
text: 'https://example.com/p',
})
})
})

describe('scanForMentionTriggerInUserTextInput', () => {
Expand All @@ -57,6 +68,15 @@ describe('scanForMentionTriggerInUserTextInput', () => {
replaceableString: '@#abc',
}))

test('@-mention URL', () =>
expect(
scanForMentionTriggerInUserTextInput('Hello @https://example.com/p')
).toEqual<MentionTrigger | null>({
leadOffset: 6,
matchingString: 'https://example.com/p',
replaceableString: '@https://example.com/p',
}))

describe('special chars', () => {
test('dotfile', () =>
expect(scanForMentionTriggerInUserTextInput('Hello @.abc')).toEqual<MentionTrigger | null>({
Expand Down Expand Up @@ -90,9 +110,6 @@ describe('scanForMentionTriggerInUserTextInput', () => {
})

test('with range', () => {
expect(scanForMentionTriggerInUserTextInput('a @b/c:')).toBeNull()
expect(scanForMentionTriggerInUserTextInput('a @b/c:1')).toBeNull()
expect(scanForMentionTriggerInUserTextInput('a @b/c:12-')).toBeNull()
expect(scanForMentionTriggerInUserTextInput('a @b/c:12-34')).toEqual<MentionTrigger>({
leadOffset: 2,
matchingString: 'b/c:12-34',
Expand Down
7 changes: 5 additions & 2 deletions lib/shared/src/mentions/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export interface MentionQuery {
/**
* The type of context item to search for.
*/
type: 'file' | 'symbol' | 'empty'
type: 'file' | 'symbol' | 'url' | 'empty'

/**
* The user's text input, to be interpreted as a fuzzy-matched query. It is stripped of any
Expand All @@ -31,10 +31,13 @@ export function parseMentionQuery(query: string): MentionQuery {
if (query.startsWith('#')) {
return { type: 'symbol', text: query.slice(1) }
}
if (query.startsWith('http://') || query.startsWith('https://')) {
return { type: 'url', text: query }
}
return { type: 'file', text: query }
}

const PUNCTUATION = ',\\+\\*\\?\\$\\@\\|#{}\\(\\)\\^\\[\\]!%\'"~=<>:;'
const PUNCTUATION = ',\\+\\*\\$\\@\\|#{}\\(\\)\\^\\[\\]!\'"<>;'

const TRIGGERS = '@'

Expand Down
93 changes: 93 additions & 0 deletions lib/shared/src/mentions/urlContextItems.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { URI } from 'vscode-uri'
import { type ContextItem, ContextItemSource } from '../codebase-context/messages'

/**
* Given a possibly incomplete URL from user input (that the user may be typing), return context
* items from fetching the URL and extracting its text content.
*/
export async function getURLContextItems(
urlInput: string,
signal?: AbortSignal
): Promise<ContextItem[]> {
const url = tryParsePossiblyIncompleteURL(urlInput)
if (url === null) {
return []
}

try {
const content = await fetchContentForURLContextItem(url.toString(), signal)
if (content === null) {
return []
}
return [
{
type: 'file',
uri: url,
content,
title: tryGetHTMLDocumentTitle(content),
source: ContextItemSource.User,
},
]
} catch (error) {
// Suppress errors because the user might be typing a URL that is not yet valid.
return []
}
}

export function isURLContextItem(item: Pick<ContextItem, 'uri'>): boolean {
return item.uri.scheme === 'http' || item.uri.scheme === 'https'
}

export async function fetchContentForURLContextItem(
url: string,
signal?: AbortSignal
): Promise<string | null> {
const resp = await fetch(url.toString(), { signal })
if (!resp.ok) {
return null
}
const body = await resp.text()

// HACK(sqs): Rudimentarily strip HTML tags, script, and other unneeded elements from body using
// regexp. This is NOT intending to be a general-purpose HTML parser and is NOT sanitizing the
// value for security.
const bodyWithoutTags = body
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<svg\b[^<]*(?:(?!<\/svg>)<[^<]*)*<\/svg>/gi, '')
.replace(/<!--.*?-->/gs, '')
.replace(/\s(?:class|style)=["'][^"']*["']/gi, '')
.replace(/\sdata-[\w-]+(=["'][^"']*["'])?/gi, '')

// TODO(sqs): Arbitrarily trim the response text to avoid overflowing the context window for the
// LLM. Ideally we would make the prompt builder prioritize this context item over other context
// because it is explicitly from the user.
const MAX_LENGTH = 14000
return bodyWithoutTags.length > MAX_LENGTH
? `${bodyWithoutTags.slice(0, MAX_LENGTH)}... (web page content was truncated)`
: bodyWithoutTags
}

/**
* Try to parse a possibly incomplete URL from user input. The reason why it's possibly incomplete
* is that the user may not have finished typing it yet.
*/
function tryParsePossiblyIncompleteURL(urlInput: string): URI | null {
try {
const url = URI.parse(urlInput)
const isValid =
(url.scheme === 'http' || url.scheme === 'https') &&
/(localhost|\.\w{2,})(:\d+)?$/.test(url.authority)
return isValid ? url : null
} catch (e) {
return null
}
}

/**
* Try to get the title of an HTML document, using incomplete regexp parsing for simplicity (because
* this feature is experimental and we don't need robustness yet).
*/
function tryGetHTMLDocumentTitle(html: string): string | undefined {
return html.match(/<title>(?<title>[^<]+)<\/title>/)?.groups?.title
}
1 change: 1 addition & 0 deletions vscode/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a
- Chat: Add Claude 3 Haiku for Pro users. [pull/3423](https://github.com/sourcegraph/cody/pull/3423)
- Autocomplete: Add StarCoder2 experimental support. [pull/61207](https://github.com/sourcegraph/cody/pull/61207)
- Autocomplete: Add `cody.autocomplete.experimental.fireworksOptions` for local debugging with Fireworks. [pull/3415](https://github.com/sourcegraph/cody/pull/3415)
- Chat: Added experimental support for including web pages as context by @-mentioning a URL (when the undocumented `cody.experimental.urlContext` VS Code setting is enabled). [pull/3436](https://github.com/sourcegraph/cody/pull/3436)

### Fixed

Expand Down
34 changes: 32 additions & 2 deletions vscode/src/chat/context/chatContext.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import { type ContextItem, type MentionQuery, parseMentionQuery } from '@sourcegraph/cody-shared'
import type * as vscode from 'vscode'
import {
type ContextItem,
FeatureFlag,
type MentionQuery,
featureFlagProvider,
getURLContextItems,
parseMentionQuery,
} from '@sourcegraph/cody-shared'
import * as vscode from 'vscode'
import {
getFileContextFiles,
getOpenTabsContextFile,
Expand Down Expand Up @@ -34,7 +41,30 @@ export async function getChatContextItemsForMention(
return getSymbolContextFiles(mentionQuery.text, MAX_RESULTS)
case 'file':
return getFileContextFiles(mentionQuery.text, MAX_RESULTS, cancellationToken)
case 'url':
return (await isURLContextFeatureFlagEnabled())
? getURLContextItems(
mentionQuery.text,
convertCancellationTokenToAbortSignal(cancellationToken)
)
: []
default:
return []
}
}

export async function isURLContextFeatureFlagEnabled(): Promise<boolean> {
return (
vscode.workspace.getConfiguration('cody').get<boolean>('experimental.urlContext') === true ||
(await featureFlagProvider.evaluateFeatureFlag(FeatureFlag.URLContext))
)
}

function convertCancellationTokenToAbortSignal(token: vscode.CancellationToken): AbortSignal {
const controller = new AbortController()
const disposable = token.onCancellationRequested(() => {
controller.abort()
disposable.dispose()
})
return controller.signal
}
18 changes: 14 additions & 4 deletions vscode/src/editor/utils/editor-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ import {
MAX_CURRENT_FILE_TOKENS,
type SymbolKind,
displayPath,
fetchContentForURLContextItem,
isCodyIgnoredFile,
isDefined,
isURLContextItem,
isWindows,
} from '@sourcegraph/cody-shared'

Expand All @@ -25,6 +27,7 @@ import {
} from '@sourcegraph/cody-shared/src/codebase-context/messages'
import { CHARS_PER_TOKEN } from '@sourcegraph/cody-shared/src/prompt/constants'
import { getOpenTabsUris } from '.'
import { isURLContextFeatureFlagEnabled } from '../../chat/context/chatContext'
import { toVSCodeRange } from '../../common/range'
import { findWorkspaceFiles } from './findWorkspaceFiles'

Expand Down Expand Up @@ -275,10 +278,17 @@ export async function fillInContextItemContent(
let content = item.content
if (!item.content) {
try {
content = await editor.getTextEditorContentForFile(
item.uri,
toVSCodeRange(item.range)
)
if (isURLContextItem(item)) {
if (await isURLContextFeatureFlagEnabled()) {
content =
(await fetchContentForURLContextItem(item.uri.toString())) ?? ''
}
} else {
content = await editor.getTextEditorContentForFile(
item.uri,
toVSCodeRange(item.range)
)
}
} catch (error) {
void vscode.window.showErrorMessage(
`Cody could not include context from ${item.uri}. (Reason: ${error})`
Expand Down
58 changes: 53 additions & 5 deletions vscode/test/e2e/chat-atFile.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import * as http from 'http'
import { expect } from '@playwright/test'

import { isWindows } from '@sourcegraph/cody-shared'

import type { AddressInfo } from 'net'
import { sidebarExplorer, sidebarSignin } from './common'
import { type ExpectedEvents, test, withPlatformSlashes } from './helpers'
import { type ExpectedEvents, type ExtraWorkspaceSettings, test, withPlatformSlashes } from './helpers'

// See chat-atFile.test.md for the expected behavior for this feature.
//
Expand Down Expand Up @@ -173,8 +175,8 @@ test.extend<ExpectedEvents>({
await expect(noMatches).toBeVisible()
await chatInput.press('ArrowRight')
await expect(noMatches).toBeVisible()
await chatInput.press('?')
await expect(chatInput).toHaveText('Explain the @Main.java ! @abcdefg?file')
await chatInput.press('$')
await expect(chatInput).toHaveText('Explain the @Main.java ! @abcdefg$file')
await expect(noMatches).not.toBeVisible()
// Selection close on submit
await chatInput.press('Enter')
Expand All @@ -186,8 +188,8 @@ test.extend<ExpectedEvents>({
await chatInput.focus()
await chatInput.fill('@unknown')
await expect(noMatches).toBeVisible()
await chatInput.press('?')
await expect(chatInput).toHaveText('@unknown?')
await chatInput.press('$')
await expect(chatInput).toHaveText('@unknown$')
await expect(noMatches).not.toBeVisible()
await chatInput.press('Backspace')
await expect(noMatches).toBeVisible()
Expand Down Expand Up @@ -347,3 +349,49 @@ test.extend<ExpectedEvents>({
await previewTab.hover()
await expect(previewTab).toBeVisible()
})

test.extend<ExtraWorkspaceSettings>({
// biome-ignore lint/correctness/noEmptyPattern: Playwright needs empty pattern to specify "no dependencies".
extraWorkspaceSettings: async ({}, use) => {
use({ 'cody.experimental.urlContext': true })
},
})('@-mention URL', async ({ page, sidebar }) => {
// Start an HTTP server to serve up the web page that we will @-mention.
const server = http.createServer((req, res) => {
res.writeHead(200, { 'Content-Type': 'text/html' })
res.end(`<h1>Hello from URL ${req.url}</h1>`)
})
const serverURL = await new Promise<URL>(resolve => {
server.listen(0, () => {
const addr = server.address() as AddressInfo
resolve(new URL(`http://localhost:${addr.port}`))
})
})

try {
await sidebarSignin(page, sidebar)

// Open chat.
await page.getByRole('button', { name: 'New Chat', exact: true }).click()
const chatPanelFrame = page.frameLocator('iframe.webview').last().frameLocator('iframe')
const chatInput = chatPanelFrame.getByRole('textbox', { name: 'Chat message' })

// Type @-mention of the URL.
const mentionURL = new URL('/foo', serverURL)
await chatInput.fill(`@${mentionURL}`)
const optionTitle = `foo ${serverURL}`
await expect(chatPanelFrame.getByRole('option', { name: optionTitle })).toBeVisible()
await chatPanelFrame.getByRole('option', { name: optionTitle }).click()
await expect(chatInput).toHaveText(`@${mentionURL} `)

// Submit the message
await chatInput.press('Enter')

// URL context item shows up and is clickable.
await chatPanelFrame.getByText('✨ Context: 1 file').click()
const chatContext = chatPanelFrame.locator('details').last()
await chatContext.getByRole('link', { name: `@${mentionURL}` }).click()
} finally {
server.close()
}
})
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ const Item: FunctionComponent<{
const item = option.item
const icon =
item.type === 'file' ? null : item.kind === 'class' ? 'symbol-structure' : 'symbol-method'
const title = item.type === 'file' ? displayPathBasename(item.uri) : item.symbolName
const title = item.title ?? (item.type === 'file' ? displayPathBasename(item.uri) : item.symbolName)
const range = item.range ? displayLineRange(item.range) : ''
const dirname = displayPathDirname(item.uri)
const description =
Expand Down

0 comments on commit 71c7080

Please sign in to comment.