From 597a22f2197fb3999f362b63c87f840c39f447a1 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Thu, 7 Dec 2023 19:44:12 +0100 Subject: [PATCH 01/13] [Obs AI Assistant] Abort controller when component unmounts --- .../public/hooks/use_chat.test.ts | 19 +++++++++++++++++++ .../public/hooks/use_chat.ts | 3 +-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.test.ts b/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.test.ts index a2ef68899877e..a442a3c91af54 100644 --- a/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.test.ts +++ b/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.test.ts @@ -233,6 +233,25 @@ describe('useChat', () => { }); }); + describe('after unmounting the component', () => { + beforeEach(() => { + act(() => { + subject.next({ + type: StreamingChatResponseEventType.ChatCompletionChunk, + id: 'my-message-id', + message: { + content: 'good', + }, + }); + hookResult.unmount(); + }); + }); + + it('shows the partial message and sets chatState to aborted', () => { + expect(mockChatService.complete.mock.lastCall?.[0].signal.aborted).toBe(true); + }); + }); + describe('after a response errors out', () => { beforeEach(() => { act(() => { diff --git a/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts b/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts index 989b3fdcb23a8..92dab013aa067 100644 --- a/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts +++ b/x-pack/plugins/observability_ai_assistant/public/hooks/use_chat.ts @@ -225,9 +225,8 @@ export function useChat({ ); useEffect(() => { - const controller = abortControllerRef.current; return () => { - controller.abort(); + abortControllerRef.current.abort(); }; }, []); From 90aad9b2969d8f34c2601c974cdaa8c7ac66531c Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Sun, 10 Dec 2023 10:03:37 +0100 Subject: [PATCH 02/13] [Obs AI Assistant] Evaluation framework --- package.json | 4 +- .../scripts/evaluation/cli.ts | 67 ++++ .../scripts/evaluation/get_service_urls.ts | 152 ++++++++ .../scripts/evaluation/index.js | 10 + .../scripts/evaluation/index.ts | 195 +++++++++++ .../scripts/evaluation/kibana_client.ts | 324 ++++++++++++++++++ .../scripts/evaluation/read_kibana_config.ts | 44 +++ .../scenarios/elasticsearch/index.ts | 21 ++ .../evaluation/scenarios/esql/index.ts | 172 ++++++++++ .../scripts/evaluation/types.ts | 28 ++ yarn.lock | 11 + 11 files changed, 1027 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/read_kibana_config.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts diff --git a/package.json b/package.json index ab21e9805a490..bbc71019133c3 100644 --- a/package.json +++ b/package.json @@ -939,6 +939,7 @@ "get-port": "^5.0.0", "getopts": "^2.2.5", "getos": "^3.1.0", + "glob": "^10.3.10", "globby": "^11.1.0", "gpt-tokenizer": "^2.1.2", "handlebars": "4.7.8", @@ -1627,6 +1628,7 @@ "supertest": "^6.3.3", "supports-color": "^7.0.0", "svgo": "^2.8.0", + "table": "^6.8.1", "tape": "^5.0.1", "tempy": "^0.3.0", "terser": "^5.16.5", @@ -1652,4 +1654,4 @@ "yargs": "^15.4.1", "yarn-deduplicate": "^6.0.2" } -} \ No newline at end of file +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts new file mode 100644 index 0000000000000..d6a4d7cf78c58 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { format, parse } from 'url'; +import { Argv } from 'yargs'; +import { readKibanaConfig } from './read_kibana_config'; + +export function options(y: Argv) { + const config = readKibanaConfig(); + + return y + .positional('grep', { + string: true as const, + array: true, + describe: 'A glob pattern for which scenarios to evaluate', + }) + .option('kibana', { + describe: 'Where Kibana is running', + string: true, + default: process.env.KIBANA_HOST || 'http://localhost:5601', + }) + .option('elasticsearch', { + alias: 'es', + describe: 'Where Elasticsearch is running', + string: true, + default: format({ + ...parse(config['elasticsearch.hosts']), + auth: `${config['elasticsearch.username']}:${config['elasticsearch.password']}`, + }), + }) + .option('connectorId', { + describe: 'The ID of the connector', + string: true, + }) + .option('persist', { + describe: + 'Whether the conversations should be stored. Adding this will generate a link at which the conversation can be opened.', + boolean: true, + default: false, + }) + .option('clear', { + describe: 'Clear conversations on startup', + boolean: true, + default: false, + }) + .option('autoTitle', { + describe: 'Whether to generate titles for new conversations', + boolean: true, + default: false, + }) + .option('logLevel', { + describe: 'Log level', + default: 'info', + }) + .check((argv) => { + if (!argv.persist && argv.clear) { + throw new Error('clear cannot be true if persist is false'); + } + if (!argv.persist && argv.autoTitle) { + throw new Error('autoTitle cannot be true if persist is false'); + } + return true; + }); +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts new file mode 100644 index 0000000000000..d554e6c70a2fc --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts @@ -0,0 +1,152 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolingLog } from '@kbn/tooling-log'; +import { omit } from 'lodash'; +import fetch from 'node-fetch'; +import { format, parse, Url } from 'url'; + +async function discoverAuth(parsedTarget: Url, log: ToolingLog) { + const possibleCredentials = [`admin:changeme`, `elastic:changeme`]; + for (const auth of possibleCredentials) { + const url = format({ + ...parsedTarget, + auth, + }); + let status: number; + try { + log.debug(`Fetching ${url}`); + const response = await fetch(url); + status = response.status; + } catch (err) { + log.debug(`${url} resulted in ${err.message}`); + status = 0; + } + + if (status === 200) { + return auth; + } + } + + throw new Error(`Failed to authenticate user for ${format(parsedTarget)}`); +} + +async function getKibanaUrl({ kibana, log }: { kibana: string; log: ToolingLog }) { + try { + const isCI = process.env.CI?.toLowerCase() === 'true'; + + const parsedKibanaUrl = parse(kibana); + + const kibanaUrlWithoutAuth = format(omit(parsedKibanaUrl, 'auth')); + + log.debug(`Checking Kibana URL ${kibanaUrlWithoutAuth} for a redirect`); + + const unredirectedResponse = await fetch(kibanaUrlWithoutAuth, { + headers: { + ...(parsedKibanaUrl.auth + ? { Authorization: `Basic ${Buffer.from(parsedKibanaUrl.auth).toString('base64')}` } + : {}), + }, + method: 'HEAD', + follow: 1, + redirect: 'manual', + }); + + log.debug('Unredirected response', unredirectedResponse.headers.get('location')); + + const discoveredKibanaUrl = + unredirectedResponse.headers + .get('location') + ?.replace('/spaces/enter', '') + ?.replace('spaces/space_selector', '') || kibanaUrlWithoutAuth; + + log.debug(`Discovered Kibana URL at ${discoveredKibanaUrl}`); + + const parsedTarget = parse(kibana); + + const parsedDiscoveredUrl = parse(discoveredKibanaUrl); + + const discoveredKibanaUrlWithAuth = format({ + ...parsedDiscoveredUrl, + auth: parsedTarget.auth, + }); + + const redirectedResponse = await fetch(discoveredKibanaUrlWithAuth, { + method: 'HEAD', + }); + + if (redirectedResponse.status !== 200) { + throw new Error( + `Expected HTTP 200 from ${discoveredKibanaUrlWithAuth}, got ${redirectedResponse.status}` + ); + } + + const discoveredKibanaUrlWithoutAuth = format({ + ...parsedDiscoveredUrl, + auth: undefined, + }); + + log.info( + `Discovered kibana running at: ${ + isCI ? discoveredKibanaUrlWithoutAuth : discoveredKibanaUrlWithAuth + }` + ); + + return discoveredKibanaUrlWithAuth.replace(/\/$/, ''); + } catch (error) { + throw new Error(`Could not connect to Kibana: ` + error.message); + } +} + +export async function getServiceUrls({ + log, + elasticsearch, + kibana, +}: { + elasticsearch: string; + kibana: string; + log: ToolingLog; +}) { + if (!elasticsearch) { + // assume things are running locally + kibana = kibana || 'http://127.0.0.1:5601'; + elasticsearch = 'http://127.0.0.1:9200'; + } + + if (!elasticsearch) { + throw new Error('Could not determine an Elasticsearch target'); + } + + const parsedTarget = parse(elasticsearch); + + let auth = parsedTarget.auth; + + if (!parsedTarget.auth) { + auth = await discoverAuth(parsedTarget, log); + } + + const formattedEsUrl = format({ + ...parsedTarget, + auth, + }); + + const suspectedKibanaUrl = kibana || elasticsearch.replace('.es', '.kb'); + + const parsedKibanaUrl = parse(suspectedKibanaUrl); + + const kibanaUrlWithAuth = format({ + ...parsedKibanaUrl, + auth, + }); + + const validatedKibanaUrl = await getKibanaUrl({ kibana: kibanaUrlWithAuth, log }); + + return { + kibanaUrl: validatedKibanaUrl, + esUrl: formattedEsUrl, + }; +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js new file mode 100644 index 0000000000000..efac843755ed4 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js @@ -0,0 +1,10 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +require('@kbn/babel-register').install(); +//eslint-disable-next-line @kbn/imports/uniform_imports +require('./index.ts'); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts new file mode 100644 index 0000000000000..4d8ae6c48261a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts @@ -0,0 +1,195 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import yargs from 'yargs'; +import { run } from '@kbn/dev-cli-runner'; +import { Client } from '@elastic/elasticsearch'; +import inquirer from 'inquirer'; +import * as glob from 'glob'; +import Path from 'path'; +import chalk from 'chalk'; +import * as table from 'table'; +import { castArray, omit, sortBy } from 'lodash'; +import { TableUserConfig } from 'table'; +import { format, parse } from 'url'; +import { options } from './cli'; +import { getServiceUrls } from './get_service_urls'; +import { KibanaClient } from './kibana_client'; +import { EvaluationFunction } from './types'; +import { MessageRole } from '../../common'; + +function runEvaluations() { + yargs(process.argv.slice(2)) + .command('*', 'Run AI Assistant evaluations', options, (argv) => { + run( + async ({ log }) => { + const serviceUrls = await getServiceUrls({ + log, + elasticsearch: argv.elasticsearch, + kibana: argv.kibana, + }); + + const kibanaClient = new KibanaClient(serviceUrls.kibanaUrl); + const esClient = new Client({ + node: serviceUrls.esUrl, + }); + + const connectors = await kibanaClient.getConnectors(); + + if (!connectors.length) { + throw new Error('No connectors found'); + } + + let connector = connectors.find((item) => item.id === argv.connectorId); + + if (!connector && argv.connectorId) { + log.warning(`Could not find connector ${argv.connectorId}`); + } + + if (!connector && connectors.length === 1) { + connector = connectors[0]; + log.debug('Using the only connector found'); + } else { + const connectorChoice = await inquirer.prompt({ + type: 'list', + name: 'connector', + message: 'Select a connector', + choices: connectors.map((item) => item.name), + }); + + connector = connectors.find((item) => item.name === connectorChoice.connector)!; + } + + log.info(`Using connector ${connector.id}`); + + const scenarios = + (argv.grep !== undefined && + castArray(argv.grep).map((file) => Path.join(process.cwd(), file))) || + glob.sync(Path.join(__dirname, './scenarios/**/*.ts')); + + if (!scenarios.length) { + throw new Error('No scenarios to run'); + } + + if (argv.clear) { + log.info('Clearing conversations'); + await esClient.deleteByQuery({ + index: '.kibana-observability-ai-assistant-conversations', + query: { + match_all: {}, + }, + refresh: true, + }); + } + + const evaluationFunctions: Array<{ + name: string; + fileName: string; + fn: EvaluationFunction; + }> = []; + + for (const fileName of scenarios) { + log.info(`Running scenario ${fileName}`); + const mod = await import(fileName); + Object.keys(mod).forEach((key) => { + evaluationFunctions.push({ name: key, fileName, fn: mod[key] }); + }); + } + + const header: string[][] = [ + [chalk.bold('Criterion'), chalk.bold('Result'), chalk.bold('Reasoning')], + ]; + + const tableConfig: TableUserConfig = { + singleLine: false, + border: { + topBody: `─`, + topJoin: `┬`, + topLeft: `┌`, + topRight: `┐`, + + bottomBody: `─`, + bottomJoin: `┴`, + bottomLeft: `└`, + bottomRight: `┘`, + + bodyLeft: `│`, + bodyRight: `│`, + bodyJoin: `│`, + + joinBody: `─`, + joinLeft: `├`, + joinRight: `┤`, + joinJoin: `┼`, + }, + spanningCells: [ + { row: 0, col: 0, colSpan: 3 }, + { row: 1, col: 0, colSpan: 3 }, + ], + columns: [ + { wrapWord: true, width: 60 }, + { wrapWord: true }, + { wrapWord: true, width: 60 }, + ], + }; + + const sortedEvaluationFunctions = sortBy(evaluationFunctions, 'fileName', 'name'); + + for (const { name, fn } of sortedEvaluationFunctions) { + log.debug(`Executing ${name}`); + const result = await fn({ + esClient, + kibanaClient, + chatClient: kibanaClient.createChatClient({ + connectorId: connector.id!, + persist: argv.persist, + title: argv.autoTitle ? undefined : name, + }), + }); + log.debug(`Result:`, JSON.stringify(result)); + const output: string[][] = [ + [ + result.messages.find((message) => message.role === MessageRole.User)!.content!, + '', + '', + ], + result.conversationId + ? [ + `${format( + omit(parse(serviceUrls.kibanaUrl), 'auth') + )}/app/observabilityAIAssistant/conversations/${result.conversationId}`, + '', + '', + ] + : ['', '', ''], + ...header, + ]; + + result.scores.forEach((score) => { + output.push([ + score.criterion, + score.score === 0 ? chalk.redBright('failed') : chalk.greenBright('passed'), + score.reasoning, + ]); + }); + log.write(table.table(output, tableConfig)); + } + }, + { + log: { + defaultLevel: argv.logLevel as any, + }, + flags: { + allowUnexpected: true, + }, + } + ); + }) + .parse(); +} + +runEvaluations(); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts new file mode 100644 index 0000000000000..bb45759c398e3 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts @@ -0,0 +1,324 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import axios, { AxiosInstance, AxiosResponse } from 'axios'; +import { pick } from 'lodash'; +import { filter, lastValueFrom, map, tap, toArray } from 'rxjs'; +import { format, parse, UrlObject } from 'url'; +import { Message, MessageRole } from '../../common'; +import { + ChatCompletionErrorCode, + ConversationCompletionError, + ConversationCreateEvent, + MessageAddEvent, + StreamingChatResponseEvent, + StreamingChatResponseEventType, +} from '../../common/conversation_complete'; +import { FunctionDefinition } from '../../common/types'; +import { concatenateOpenAiChunks } from '../../common/utils/concatenate_openai_chunks'; +import { processOpenAiStream } from '../../common/utils/process_openai_stream'; +import { APIReturnType, ObservabilityAIAssistantAPIClientRequestParamsOf } from '../../public'; +import { getAssistantSetupMessage } from '../../public/service/get_assistant_setup_message'; +import { streamIntoObservable } from '../../server/service/util/stream_into_observable'; +import { EvaluationResult } from './types'; + +type InnerMessage = Message['message']; +type StringOrMessageList = string | InnerMessage[]; + +interface ChatClient { + chat: (message: StringOrMessageList) => Promise; + complete: ( + ...args: [StringOrMessageList] | [string, InnerMessage[]] + ) => Promise<{ conversationId?: string; messages: InnerMessage[] }>; + + evaluate: ( + {}: { conversationId?: string; messages: InnerMessage[] }, + criteria: string[] + ) => Promise; +} + +export class KibanaClient { + axios: AxiosInstance; + constructor(private readonly url: string) { + this.axios = axios.create({ + headers: { + 'kbn-xsrf': 'foo', + }, + }); + } + + private getUrl(props: { query?: UrlObject['query']; pathname: string }) { + const parsed = parse(this.url); + + const baseUrl = parsed.pathname?.replaceAll('/', '') ?? ''; + + return format({ + ...parsed, + pathname: `/${[ + baseUrl, + props.pathname.startsWith('/') ? props.pathname.substring(1) : props.pathname, + ].join('/')}`, + query: props.query, + }); + } + + createChatClient({ + connectorId, + persist, + title, + }: { + connectorId: string; + persist: boolean; + title?: string; + }): ChatClient { + function getMessages(message: string | Array): Array { + if (typeof message === 'string') { + return [ + { + content: message, + role: MessageRole.User, + }, + ]; + } + return message; + } + + const that = this; + + async function getFunctions() { + const { + data: { functionDefinitions, contextDefinitions }, + }: AxiosResponse> = + await that.axios.get( + that.getUrl({ pathname: '/internal/observability_ai_assistant/functions' }) + ); + + return { functionDefinitions, contextDefinitions }; + } + + async function chat({ + messages, + functions, + functionCall, + }: { + messages: Message[]; + functions: FunctionDefinition[]; + functionCall?: string; + }) { + const params: ObservabilityAIAssistantAPIClientRequestParamsOf<'POST /internal/observability_ai_assistant/chat'>['params']['body'] = + { + messages, + connectorId, + functions: functions.map((fn) => pick(fn, 'name', 'description', 'parameters')), + functionCall, + }; + const stream$ = streamIntoObservable( + ( + await that.axios.post( + that.getUrl({ + pathname: '/internal/observability_ai_assistant/chat', + query: { stream: true }, + }), + params, + { responseType: 'stream' } + ) + ).data + ).pipe(processOpenAiStream(), concatenateOpenAiChunks()); + + const receivedMessage = await lastValueFrom(stream$); + + return receivedMessage.message; + } + + return { + chat: async (message) => { + const { functionDefinitions, contextDefinitions } = await getFunctions(); + const messages = [ + getAssistantSetupMessage({ contexts: contextDefinitions }), + ...getMessages(message).map((msg) => ({ + message: msg, + '@timestamp': new Date().toISOString(), + })), + ]; + return chat({ messages, functions: functionDefinitions }); + }, + complete: async (...args) => { + const messagesArg = args.length === 1 ? args[0] : args[1]; + const conversationId = args.length === 1 ? undefined : args[0]; + const { contextDefinitions } = await getFunctions(); + const messages = [ + getAssistantSetupMessage({ contexts: contextDefinitions }), + ...getMessages(messagesArg).map((msg) => ({ + message: msg, + '@timestamp': new Date().toISOString(), + })), + ]; + + const stream$ = streamIntoObservable( + ( + await that.axios.post( + that.getUrl({ + pathname: '/internal/observability_ai_assistant/chat/complete', + }), + { + conversationId, + messages, + connectorId, + persist, + title, + }, + { responseType: 'stream' } + ) + ).data + ).pipe( + map((line) => JSON.parse(line) as StreamingChatResponseEvent), + tap((event) => { + if (event.type === StreamingChatResponseEventType.ConversationCompletionError) { + throw new ConversationCompletionError( + event.error.code ?? ChatCompletionErrorCode.InternalError, + event.error.message + ); + } + }), + filter( + (event): event is MessageAddEvent | ConversationCreateEvent => + event.type === StreamingChatResponseEventType.MessageAdd || + event.type === StreamingChatResponseEventType.ConversationCreate + ), + toArray() + ); + + const events = await lastValueFrom(stream$); + + return { + messages: messages + .map((msg) => msg.message) + .concat( + events + .filter( + (event): event is MessageAddEvent => + event.type === StreamingChatResponseEventType.MessageAdd + ) + .map((event) => event.message.message) + ), + conversationId: + conversationId || + events.find( + (event): event is ConversationCreateEvent => + event.type === StreamingChatResponseEventType.ConversationCreate + )?.conversation.id, + }; + }, + evaluate: async ({ messages, conversationId }, criteria) => { + const message = await chat({ + messages: [ + { + '@timestamp': new Date().toISOString(), + message: { + role: MessageRole.System, + content: `You are a critical assistant for evaluating conversations with the Elastic Observability AI Assistant, + which helps our users make sense of their Observability data. + + Your goal is to verify whether a conversation between the user and the assistant matches the given criteria. + + For each criterion, calculate a score. Explain your score, by describing what the assistant did right, and what the + assistant did wrong, where it could improve, and what the root cause was in case of a failure.`, + }, + }, + { + '@timestamp': new Date().toString(), + message: { + role: MessageRole.User, + content: `Evaluate the conversation according to the following criteria: + + ${criteria.map((criterion, index) => { + return `${index}: ${criterion}`; + })} + + This is the conversation: + + ${JSON.stringify(messages)}`, + }, + }, + ], + functions: [ + { + name: 'scores', + parameters: { + type: 'object', + properties: { + criteria: { + type: 'array', + items: { + type: 'object', + properties: { + index: { + type: 'number', + description: 'The number of the criterion', + }, + score: { + type: 'number', + description: + 'A score of either 0 (criterion failed) or 1 (criterion succeeded)', + }, + reasoning: { + type: 'string', + description: + 'Your reasoning for the score. Explain your score by mentioning what you expected to happen and what did happen.', + }, + }, + required: ['index', 'score', 'reasoning'], + }, + }, + }, + required: ['criteria'], + }, + contexts: [], + description: 'Call this function to return scores for the criteria', + }, + ], + functionCall: 'scores', + }); + + return { + conversationId, + messages, + scores: ( + JSON.parse(message.function_call.arguments) as { + criteria: Array<{ index: number; score: number; reasoning: string }>; + } + ).criteria.map(({ index, score, reasoning }) => { + return { + criterion: criteria[index], + score, + reasoning, + }; + }), + }; + }, + }; + } + + async getConnectors() { + const connectors: AxiosResponse< + Array<{ + id: string; + connector_type_id: string; + name: string; + is_preconfigured: boolean; + is_deprecated: boolean; + referenced_by_count: number; + }> + > = await axios.get( + this.getUrl({ + pathname: '/api/actions/connectors', + }) + ); + + return connectors.data.filter((connector) => connector.connector_type_id === '.gen-ai'); + } +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/read_kibana_config.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/read_kibana_config.ts new file mode 100644 index 0000000000000..5b64bb2f56189 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/read_kibana_config.ts @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import path from 'path'; +import fs from 'fs'; +import yaml from 'js-yaml'; +import { identity, pickBy } from 'lodash'; + +export type KibanaConfig = ReturnType; + +export const readKibanaConfig = () => { + const kibanaConfigDir = path.join(__filename, '../../../../../../config'); + const kibanaDevConfig = path.join(kibanaConfigDir, 'kibana.dev.yml'); + const kibanaConfig = path.join(kibanaConfigDir, 'kibana.yml'); + + const loadedKibanaConfig = (yaml.safeLoad( + fs.readFileSync(fs.existsSync(kibanaDevConfig) ? kibanaDevConfig : kibanaConfig, 'utf8') + ) || {}) as {}; + + const cliEsCredentials = pickBy( + { + 'elasticsearch.username': process.env.ELASTICSEARCH_USERNAME, + 'elasticsearch.password': process.env.ELASTICSEARCH_PASSWORD, + 'elasticsearch.hosts': process.env.ELASTICSEARCH_HOST, + }, + identity + ) as { + 'elasticsearch.username'?: string; + 'elasticsearch.password'?: string; + 'elasticsearch.hosts'?: string; + }; + + return { + 'elasticsearch.hosts': 'http://localhost:9200', + 'elasticsearch.username': 'elastic', + 'elasticsearch.password': 'changeme', + ...loadedKibanaConfig, + ...cliEsCredentials, + }; +}; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts new file mode 100644 index 0000000000000..1f73f9a9a625d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { EvaluationFunction } from '../../types'; + +export const health: EvaluationFunction = async ({ chatClient }) => { + const conversation = await chatClient.complete( + 'Can you tell me what the state of my Elasticsearch cluster is?' + ); + + const evaluation = await chatClient.evaluate(conversation, [ + 'Calls the Elasticsearch function with method: GET and path: _cluster/health', + 'Describes the cluster status based on the response from the Elasticsearch function', + ]); + + return evaluation; +}; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts new file mode 100644 index 0000000000000..99852c2e5e706 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts @@ -0,0 +1,172 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { last } from 'lodash'; +import { MessageRole } from '../../../../common'; +import { EvaluationFunction } from '../../types'; + +function extractEsqlQuery(response: string) { + return response.match(/```esql([\s\S]*?)```/)?.[1]; +} + +function createEsqlQueryEvaluation({ + question, + expected, + criteria = [], + execute = true, +}: { + question: string; + expected?: string; + criteria?: string[]; + execute?: boolean; +}): EvaluationFunction { + return async ({ chatClient }) => { + let conversation = await chatClient.complete(question); + + const esqlQuery = extractEsqlQuery(last(conversation.messages)?.content || ''); + + if (esqlQuery && execute) { + conversation = await chatClient.complete( + conversation.conversationId!, + conversation.messages.concat({ + content: '', + role: MessageRole.Assistant, + function_call: { + name: 'execute_query', + arguments: JSON.stringify({ + query: esqlQuery, + }), + trigger: MessageRole.User, + }, + }) + ); + } + + const evaluation = await chatClient.evaluate(conversation, [ + ...(expected + ? [ + `Returns a ES|QL query that is functionally equivalent to: + ${expected}`, + ] + : []), + ...(execute && expected ? [`The query successfully executed without an error`] : []), + ...criteria, + ]); + + return evaluation; + }; +} + +export const metricsApmQuery = createEsqlQueryEvaluation({ + question: + 'I want to see a query for metrics-apm*, filtering on metricset.name:transaction and metricset.interval:1m, showing the average duration (via transaction.duration.histogram), in 50 buckets.', + expected: `FROM metrics-apm* + | WHERE metricset.name == "transaction" AND metricset.interval == "1m" + | EVAL bucket = AUTO_BUCKET(@timestamp, 50, , ) + | STATS avg_duration = AVG(transaction.duration.histogram) BY bucket`, +}); + +export const packetbeatUniqueDomainsQuery = createEsqlQueryEvaluation({ + question: + 'For standard Elastic ECS compliant packetbeat data view, create an ES|QL query that shows the top 10 unique domains by doc count', + expected: `FROM packetbeat-* + | STATS doc_count = COUNT(destination.domain) BY destination.domain + | SORT doc_count DESC + | LIMIT 10`, +}); + +export const fiveEarliestEmployeesQuery = createEsqlQueryEvaluation({ + question: + 'From employees, I want to see the 5 earliest employees (hire_date), I want to display only the month and the year that they were hired in and their employee number (emp_no). Format the date as e.g. "September 2019".', + expected: `FROM employees + | EVAL hire_date_formatted = DATE_FORMAT(hire_date, ""MMMM yyyy"") + | SORT hire_date + | KEEP emp_no, hire_date_formatted + | LIMIT 5`, + execute: false, +}); + +export const employeesWithPaginationQuery = createEsqlQueryEvaluation({ + question: + 'From employees, I want to sort the documents by salary, and then return 10 results per page, and then see the second page', + criteria: ['The assistant should mention that pagination is currently not supported in ES|QL'], +}); + +export const logsAvgCpuQuery = createEsqlQueryEvaluation({ + question: + 'My logs data (ECS) is in `logs-*`. Show me a query that gets the average CPU per host, limit it to the top 10 results, in 1m buckets, and only include the last 15m. ', + expected: `FROM logs-* + | WHERE @timestamp >= NOW() - 15 minutes + | EVAL bucket = DATE_TRUNC(1 minute, @timestamp) + | STATS avg_cpu = AVG(system.cpu.total.norm.pct) BY bucket, host.name + | LIMIT 10`, +}); + +export const apmServiceInventoryQuery = createEsqlQueryEvaluation({ + question: + 'I want to show a list of services with APM data. My data is in `traces-apm*`. I want to show the average transaction duration, the success rate (by dividing event.outcome:failure by event.outcome:failure+success), and total amount of requests. As a time range, select the last 24 hours. Use ES|QL.', + expected: `FROM traces-apm* + | WHERE @timestamp >= NOW() - 24 hours + | EVAL successful = CASE(event.outcome == "success", 1, 0), + failed = CASE(event.outcome == "failure", 1, 0) + | STATS success_rate = AVG(successful), + avg_duration = AVG(transaction.duration), + total_requests = COUNT(transaction.id) BY service.name`, +}); + +export const metricbeatCpuQuery = createEsqlQueryEvaluation({ + question: `from \`metricbeat*\`, I want to see the percentage of CPU time normalized by the number of CPU cores, broken down by hostname. the fields are system.cpu.user.pct, system.cpu.system.pct, and system.cpu.cores`, + expected: `FROM metricbeat* + | EVAL cpu_pct_normalized = (system.cpu.user.pct + system.cpu.system.pct) / system.cpu.cores + | STATS AVG(cpu_pct_normalized) BY host.name`, +}); + +export const postgresDurationQuery = createEsqlQueryEvaluation({ + question: 'extract the query duration from postgres log messages, and calculate the avg', + expected: `FROM postgres-logs + | DISSECT message "%{} duration: %{query_duration} ms" + | EVAL query_duration_num = TO_DOUBLE(query_duration) + | STATS avg_duration = AVG(query_duration_num)`, +}); + +export const apmExitSpanQuery = createEsqlQueryEvaluation({ + question: `I've got APM data in \`metrics-apm\`. Filter on \`metricset.name:service_destination\` and the last 24 hours. Break down by span.destination.service.resource. Each document contains the count of total events (span.destination.service.response_time.count) for that document's interval and the total amount of latency (span.destination.service.response_time.sum.us). A document either contains an aggregate of failed events (event.outcome:success) or failed events (event.outcome:failure). A single document might represent multiple failures or successes, depending on the value of span.destination.service.response_time.count. For each value of span.destination.service.resource, give me the average throughput, latency per request, and failure rate, as a value between 0 and 1. Just show me the query.`, + expected: `FROM metrics-apm + | WHERE metricset.name == "service_destination" AND @timestamp >= NOW() - 24 hours + | EVAL total_response_time = span.destination.service.response_time.sum.us / span.destination.service.response_time.count, total_failures = CASE(event.outcome == "failure", 1, 0) * span.destination.service.response_time.count + | STATS + avg_throughput = AVG(span.destination.service.response_time.count), + avg_latency = AVG(total_response_time), + failure_rate = AVG(total_failures) + BY span.destination.service.resource`, +}); + +export const highCardinalityLogsErrorQuery = createEsqlQueryEvaluation({ + question: `i have logs in high-cardinality-data-fake_stack.admin-console-* . errors are found when log.level contais the value ERROR. generate a query to obtain the error rate as a percetage of the total logs per day for the last 7 days`, + expected: `FROM high-cardinality-data-fake_stack.admin-console-* + | WHERE @timestamp >= NOW() - 7 days + | EVAL error = CASE(log.level == "ERROR", 1, 0), total = 1 + | EVAL bucket = DATE_TRUNC(1 day, @timestamp) + | STATS total_errors = SUM(error), total_logs = SUM(total) BY bucket + | EVAL error_rate = (total_errors / total_logs) * 100`, +}); + +export const nycTaxisDropoffTimeQuery = createEsqlQueryEvaluation({ + question: + 'From `nyc_taxis`, give me a query that shows the top 10 results where the drop off time was between 6am and 10am.', + expected: `FROM nyc_taxis + | WHERE DATE_EXTRACT(drop_off_time, "hour") >= 6 AND DATE_EXTRACT(drop_off_time, "hour") < 10 + | LIMIT 10`, +}); + +export const apmTraceDurationQuery = createEsqlQueryEvaluation({ + question: + 'My APM data is in `traces-apm*`. What’s the average for `transaction.duration.us` per service over the last hour?', + expected: `FROM traces-apm* + | WHERE @timestamp > NOW() - 1 hour + | STATS AVG(transaction.duration.us) BY service.name`, +}); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts new file mode 100644 index 0000000000000..3ee8c2eaebe99 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { Client } from '@elastic/elasticsearch'; +import { Message } from '../../common'; +import { KibanaClient } from './kibana_client'; + +export interface ScenarioOptions { + esClient: Client; + kibanaClient: KibanaClient; + chatClient: ReturnType; +} + +export interface EvaluationResult { + conversationId?: string; + messages: Array; + scores: Array<{ + criterion: string; + reasoning: string; + score: number; + }>; +} + +export type EvaluationFunction = (options: ScenarioOptions) => Promise; diff --git a/yarn.lock b/yarn.lock index dd4e1988b5727..318aabf69c180 100644 --- a/yarn.lock +++ b/yarn.lock @@ -28565,6 +28565,17 @@ table@^6.8.0: string-width "^4.2.3" strip-ansi "^6.0.1" +table@^6.8.1: + version "6.8.1" + resolved "https://registry.yarnpkg.com/table/-/table-6.8.1.tgz#ea2b71359fe03b017a5fbc296204471158080bdf" + integrity sha512-Y4X9zqrCftUhMeH2EptSSERdVKt/nEdijTOacGD/97EKjhQ/Qs8RTlEGABSJNNN8lac9kheH+af7yAkEWlgneA== + dependencies: + ajv "^8.0.1" + lodash.truncate "^4.4.2" + slice-ansi "^4.0.0" + string-width "^4.2.3" + strip-ansi "^6.0.1" + tapable@^1.0.0, tapable@^1.1.3: version "1.1.3" resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2" From 6c9a3fcebc7f22df6d35c3f19817f46ce17eb29e Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Sun, 10 Dec 2023 11:12:26 +0100 Subject: [PATCH 03/13] README.md --- package.json | 2 +- .../scripts/evaluation/README.md | 37 +++++++++++++++++++ .../scripts/evaluation/cli.ts | 4 +- .../scripts/evaluation/index.ts | 4 +- 4 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md diff --git a/package.json b/package.json index 56fa8c01b3ca4..69466a6ddc0f1 100644 --- a/package.json +++ b/package.json @@ -1661,4 +1661,4 @@ "yargs": "^15.4.1", "yarn-deduplicate": "^6.0.2" } -} +} \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md new file mode 100644 index 0000000000000..e011b1cd4c7ec --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md @@ -0,0 +1,37 @@ +# Observability AI Assistant Evaluation Framework + +## Overview + +This tool is developed for our team working on the Elastic Observability platform, specifically focusing on evaluating the Observability AI Assistant. It simplifies scripting and evaluating various scenarios with the Large Language Model (LLM) integration. + +## Setup requirements + +- An Elasticsearch instance +- A Kibana instance +- At least one .gen-ai connector set up + +## Running evaluations + +Run the tool using: + +`$ node x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js` + +This will evaluate all existing scenarios, and write the evaluation results to the terminal. + +### Configuration + +#### Kibana and Elasticsearch + +By default, the tool will look for a Kibana instance running locally (at `http://localhost:5601`, which is the default address for running Kibana in development mode). It will also attempt to read the Kibana config file for the Elasticsearch address & credentials. If you want to override these settings, use `--kibana` and `--es`. Only basic auth is supported, e.g. `--kibana http://username:password@localhost:5601`. + +#### Connector + +Use `--connectorId` to specify a `.gen-ai` connector to use. If none are given, it will prompt you to select a connector based on the ones that are available. If only a single `.gen-ai` connector is found, it will be used without prompting. + +#### Persisting conversations + +By default, completed conversations are not persisted. If you do want to persist them, for instance for reviewing purposes, set the `--persist` flag to store them. This will also generate a clickable link in the output of the evaluation that takes you to the conversation. + +If you want to clear conversations on startup, use the `--clear` flag. This only works when `--persist` is enabled. + +When storing conversations, the name of the scenario is used as a title. Set the `--autoTitle` flag to have the LLM generate a title for you. diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts index d6a4d7cf78c58..530ca511b6e0d 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts @@ -12,10 +12,10 @@ export function options(y: Argv) { const config = readKibanaConfig(); return y - .positional('grep', { + .positional('files', { string: true as const, array: true, - describe: 'A glob pattern for which scenarios to evaluate', + describe: 'A file or list of files containing the scenarios to evaluate. Defaults to all', }) .option('kibana', { describe: 'Where Kibana is running', diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts index 4d8ae6c48261a..a918a918c3cd2 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts @@ -67,8 +67,8 @@ function runEvaluations() { log.info(`Using connector ${connector.id}`); const scenarios = - (argv.grep !== undefined && - castArray(argv.grep).map((file) => Path.join(process.cwd(), file))) || + (argv.files !== undefined && + castArray(argv.files).map((file) => Path.join(process.cwd(), file))) || glob.sync(Path.join(__dirname, './scenarios/**/*.ts')); if (!scenarios.length) { From 2c6ad2972dd334638794bfa5120ec39ccc892842 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 12 Dec 2023 13:00:32 +0100 Subject: [PATCH 04/13] Add --grep option --- .../observability_ai_assistant/scripts/evaluation/cli.ts | 7 ++++++- .../scripts/evaluation/index.ts | 9 ++++++++- .../scripts/evaluation/scenarios/esql/index.ts | 5 +++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts index 530ca511b6e0d..3836b41b622f4 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts @@ -12,11 +12,16 @@ export function options(y: Argv) { const config = readKibanaConfig(); return y - .positional('files', { + .option('files', { string: true as const, array: true, describe: 'A file or list of files containing the scenarios to evaluate. Defaults to all', }) + .option('grep', { + string: true, + array: false, + describe: 'A string or regex to filter scenarios by', + }) .option('kibana', { describe: 'Where Kibana is running', string: true, diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts index a918a918c3cd2..721e73696e276 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts @@ -86,7 +86,7 @@ function runEvaluations() { }); } - const evaluationFunctions: Array<{ + let evaluationFunctions: Array<{ name: string; fileName: string; fn: EvaluationFunction; @@ -100,6 +100,13 @@ function runEvaluations() { }); } + if (argv.grep) { + const lc = argv.grep.toLowerCase(); + evaluationFunctions = evaluationFunctions.filter((fn) => + fn.name.toLowerCase().includes(lc) + ); + } + const header: string[][] = [ [chalk.bold('Criterion'), chalk.bold('Result'), chalk.bold('Reasoning')], ]; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts index 99852c2e5e706..c55df6e08849b 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts @@ -119,14 +119,15 @@ export const apmServiceInventoryQuery = createEsqlQueryEvaluation({ }); export const metricbeatCpuQuery = createEsqlQueryEvaluation({ - question: `from \`metricbeat*\`, I want to see the percentage of CPU time normalized by the number of CPU cores, broken down by hostname. the fields are system.cpu.user.pct, system.cpu.system.pct, and system.cpu.cores`, + question: `from \`metricbeat*\`, using ES|QL, I want to see the percentage of CPU time normalized by the number of CPU cores, broken down by hostname. the fields are system.cpu.user.pct, system.cpu.system.pct, and system.cpu.cores`, expected: `FROM metricbeat* | EVAL cpu_pct_normalized = (system.cpu.user.pct + system.cpu.system.pct) / system.cpu.cores | STATS AVG(cpu_pct_normalized) BY host.name`, }); export const postgresDurationQuery = createEsqlQueryEvaluation({ - question: 'extract the query duration from postgres log messages, and calculate the avg', + question: + 'extract the query duration from postgres log messages in postgres-logs*, using ECS fields, and calculate the avg', expected: `FROM postgres-logs | DISSECT message "%{} duration: %{query_duration} ms" | EVAL query_duration_num = TO_DOUBLE(query_duration) From 5fbd4a3160e1439c87eb4b0c767c9f1d1e188442 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 12 Dec 2023 13:34:25 +0100 Subject: [PATCH 05/13] Fix types --- .../common/utils/concatenate_openai_chunks.ts | 17 +++++++++++++++-- .../server/service/client/index.ts | 3 +++ .../observability_ai_assistant/tsconfig.json | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/observability_ai_assistant/common/utils/concatenate_openai_chunks.ts b/x-pack/plugins/observability_ai_assistant/common/utils/concatenate_openai_chunks.ts index f15a193908a4e..e14b4cce868f1 100644 --- a/x-pack/plugins/observability_ai_assistant/common/utils/concatenate_openai_chunks.ts +++ b/x-pack/plugins/observability_ai_assistant/common/utils/concatenate_openai_chunks.ts @@ -7,10 +7,23 @@ import { cloneDeep } from 'lodash'; import { type Observable, scan } from 'rxjs'; -import { CreateChatCompletionResponseChunk, MessageRole } from '../types'; +import { type CreateChatCompletionResponseChunk, MessageRole } from '../types'; export const concatenateOpenAiChunks = - () => (source: Observable) => + () => + ( + source: Observable + ): Observable<{ + message: { + content: string; + role: MessageRole; + function_call: { + name: string; + arguments: string; + trigger: MessageRole.Assistant | MessageRole.User; + }; + }; + }> => source.pipe( scan( (acc, { choices }) => { diff --git a/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts b/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts index c111af3d92d48..9423977428d66 100644 --- a/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts +++ b/x-pack/plugins/observability_ai_assistant/server/service/client/index.ts @@ -406,6 +406,9 @@ export class ObservabilityAIAssistantClient { function_call: functionCall ? { name: functionCall } : undefined, }; + this.dependencies.logger.debug(`Sending conversation to connector`); + this.dependencies.logger.debug(JSON.stringify(request, null, 2)); + const executeResult = await this.dependencies.actionsClient.execute({ actionId: connectorId, params: { diff --git a/x-pack/plugins/observability_ai_assistant/tsconfig.json b/x-pack/plugins/observability_ai_assistant/tsconfig.json index 03c1fddb28ecf..b3123c293d681 100644 --- a/x-pack/plugins/observability_ai_assistant/tsconfig.json +++ b/x-pack/plugins/observability_ai_assistant/tsconfig.json @@ -7,6 +7,7 @@ "../../../typings/**/*", "common/**/*", "public/**/*", + "scripts/**/*", "typings/**/*", "public/**/*.json", "server/**/*" From d908e35995d2688e265af2118b1b66f310949c95 Mon Sep 17 00:00:00 2001 From: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Date: Tue, 12 Dec 2023 12:42:52 +0000 Subject: [PATCH 06/13] [CI] Auto-commit changed files from 'node scripts/lint_ts_projects --fix' --- x-pack/plugins/observability_ai_assistant/tsconfig.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/x-pack/plugins/observability_ai_assistant/tsconfig.json b/x-pack/plugins/observability_ai_assistant/tsconfig.json index b3123c293d681..d1facbb985cb5 100644 --- a/x-pack/plugins/observability_ai_assistant/tsconfig.json +++ b/x-pack/plugins/observability_ai_assistant/tsconfig.json @@ -49,7 +49,10 @@ "@kbn/licensing-plugin", "@kbn/share-plugin", "@kbn/utility-types-jest", - "@kbn/analytics-client" + "@kbn/analytics-client", + "@kbn/tooling-log", + "@kbn/babel-register", + "@kbn/dev-cli-runner" ], "exclude": ["target/**/*"] } From 76d87348d8deb30bf2a566ddeb4f9e6124e3c135 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 12 Dec 2023 14:12:50 +0100 Subject: [PATCH 07/13] Add --spaceId option --- .../scripts/evaluation/README.md | 4 ++-- .../scripts/evaluation/cli.ts | 6 ++++++ .../scripts/evaluation/{index.ts => evaluation.ts} | 10 +++++----- .../scripts/evaluation/kibana_client.ts | 7 +++++-- 4 files changed, 18 insertions(+), 9 deletions(-) rename x-pack/plugins/observability_ai_assistant/scripts/evaluation/{index.ts => evaluation.ts} (94%) diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md index e011b1cd4c7ec..c5ff90ed582f2 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md @@ -22,7 +22,7 @@ This will evaluate all existing scenarios, and write the evaluation results to t #### Kibana and Elasticsearch -By default, the tool will look for a Kibana instance running locally (at `http://localhost:5601`, which is the default address for running Kibana in development mode). It will also attempt to read the Kibana config file for the Elasticsearch address & credentials. If you want to override these settings, use `--kibana` and `--es`. Only basic auth is supported, e.g. `--kibana http://username:password@localhost:5601`. +By default, the tool will look for a Kibana instance running locally (at `http://localhost:5601`, which is the default address for running Kibana in development mode). It will also attempt to read the Kibana config file for the Elasticsearch address & credentials. If you want to override these settings, use `--kibana` and `--es`. Only basic auth is supported, e.g. `--kibana http://username:password@localhost:5601`. If you want to use a specific space, use `--spaceId` #### Connector @@ -32,6 +32,6 @@ Use `--connectorId` to specify a `.gen-ai` connector to use. If none are given, By default, completed conversations are not persisted. If you do want to persist them, for instance for reviewing purposes, set the `--persist` flag to store them. This will also generate a clickable link in the output of the evaluation that takes you to the conversation. -If you want to clear conversations on startup, use the `--clear` flag. This only works when `--persist` is enabled. +If you want to clear conversations on startup, use the `--clear` flag. This only works when `--persist` is enabled. If `--spaceId` is set, only conversations for the current space will be cleared. When storing conversations, the name of the scenario is used as a title. Set the `--autoTitle` flag to have the LLM generate a title for you. diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts index 3836b41b622f4..fe4fb7ec6e69d 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts @@ -27,6 +27,12 @@ export function options(y: Argv) { string: true, default: process.env.KIBANA_HOST || 'http://localhost:5601', }) + .option('spaceId', { + describe: + 'The space to use. If space is set, conversations will only be cleared for that spaceId', + string: true, + array: false, + }) .option('elasticsearch', { alias: 'es', describe: 'Where Elasticsearch is running', diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts similarity index 94% rename from x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts rename to x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts index 721e73696e276..7f62e4fc24a1e 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts @@ -33,7 +33,7 @@ function runEvaluations() { kibana: argv.kibana, }); - const kibanaClient = new KibanaClient(serviceUrls.kibanaUrl); + const kibanaClient = new KibanaClient(serviceUrls.kibanaUrl, argv.spaceId); const esClient = new Client({ node: serviceUrls.esUrl, }); @@ -80,7 +80,7 @@ function runEvaluations() { await esClient.deleteByQuery({ index: '.kibana-observability-ai-assistant-conversations', query: { - match_all: {}, + ...(argv.spaceId ? { term: { namespace: argv.spaceId } } : { match_all: {} }), }, refresh: true, }); @@ -166,9 +166,9 @@ function runEvaluations() { ], result.conversationId ? [ - `${format( - omit(parse(serviceUrls.kibanaUrl), 'auth') - )}/app/observabilityAIAssistant/conversations/${result.conversationId}`, + `${format(omit(parse(serviceUrls.kibanaUrl), 'auth'))}/${ + argv.spaceId ? `s/${argv.spaceId}/` : '' + }app/observabilityAIAssistant/conversations/${result.conversationId}`, '', '', ] diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts index bb45759c398e3..7c447931f30de 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts @@ -43,7 +43,7 @@ interface ChatClient { export class KibanaClient { axios: AxiosInstance; - constructor(private readonly url: string) { + constructor(private readonly url: string, private readonly spaceId?: string) { this.axios = axios.create({ headers: { 'kbn-xsrf': 'foo', @@ -56,14 +56,17 @@ export class KibanaClient { const baseUrl = parsed.pathname?.replaceAll('/', '') ?? ''; - return format({ + const url = format({ ...parsed, pathname: `/${[ baseUrl, + ...(this.spaceId ? ['s', this.spaceId] : []), props.pathname.startsWith('/') ? props.pathname.substring(1) : props.pathname, ].join('/')}`, query: props.query, }); + + return url; } createChatClient({ From 97fe1aca3708382eceeda2080a1afaad8c46d1f7 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 12 Dec 2023 16:43:42 +0100 Subject: [PATCH 08/13] Replace glob with fast-glob to prevent type errors --- package.json | 4 ++-- .../scripts/evaluation/evaluation.ts | 4 ++-- yarn.lock | 11 +++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index 69466a6ddc0f1..0cc8017919270 100644 --- a/package.json +++ b/package.json @@ -934,6 +934,7 @@ "exponential-backoff": "^3.1.1", "extract-zip": "^2.0.1", "fast-deep-equal": "^3.1.1", + "fast-glob": "^3.3.2", "fflate": "^0.6.9", "file-saver": "^1.3.8", "fnv-plus": "^1.3.1", @@ -944,7 +945,6 @@ "get-port": "^5.0.0", "getopts": "^2.2.5", "getos": "^3.1.0", - "glob": "^10.3.10", "globby": "^11.1.0", "gpt-tokenizer": "^2.1.2", "handlebars": "4.7.8", @@ -1661,4 +1661,4 @@ "yargs": "^15.4.1", "yarn-deduplicate": "^6.0.2" } -} \ No newline at end of file +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts index 7f62e4fc24a1e..5c51653036645 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts @@ -9,7 +9,7 @@ import yargs from 'yargs'; import { run } from '@kbn/dev-cli-runner'; import { Client } from '@elastic/elasticsearch'; import inquirer from 'inquirer'; -import * as glob from 'glob'; +import * as fastGlob from 'fast-glob'; import Path from 'path'; import chalk from 'chalk'; import * as table from 'table'; @@ -69,7 +69,7 @@ function runEvaluations() { const scenarios = (argv.files !== undefined && castArray(argv.files).map((file) => Path.join(process.cwd(), file))) || - glob.sync(Path.join(__dirname, './scenarios/**/*.ts')); + fastGlob.sync(Path.join(__dirname, './scenarios/**/*.ts')); if (!scenarios.length) { throw new Error('No scenarios to run'); diff --git a/yarn.lock b/yarn.lock index 1581bc726f815..e0da4d0cde045 100644 --- a/yarn.lock +++ b/yarn.lock @@ -16642,6 +16642,17 @@ fast-glob@^3.0.3, fast-glob@^3.1.1, fast-glob@^3.2.11, fast-glob@^3.2.2, fast-gl merge2 "^1.3.0" micromatch "^4.0.4" +fast-glob@^3.3.2: + version "3.3.2" + resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.3.2.tgz#a904501e57cfdd2ffcded45e99a54fef55e46129" + integrity sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow== + dependencies: + "@nodelib/fs.stat" "^2.0.2" + "@nodelib/fs.walk" "^1.2.3" + glob-parent "^5.1.2" + merge2 "^1.3.0" + micromatch "^4.0.4" + fast-json-parse@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/fast-json-parse/-/fast-json-parse-1.0.3.tgz#43e5c61ee4efa9265633046b770fb682a7577c4d" From a1edd53d177d764dca81dec8b472a4b8f75fcf6a Mon Sep 17 00:00:00 2001 From: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Date: Tue, 12 Dec 2023 16:29:29 +0000 Subject: [PATCH 09/13] [CI] Auto-commit changed files from 'node scripts/eslint --no-cache --fix' --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0cc8017919270..1fd7ee1dfb83e 100644 --- a/package.json +++ b/package.json @@ -1661,4 +1661,4 @@ "yargs": "^15.4.1", "yarn-deduplicate": "^6.0.2" } -} +} \ No newline at end of file From decc8b75da593d71005de1a9a6f696ab257c5056 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 12 Dec 2023 17:29:34 +0100 Subject: [PATCH 10/13] Newlines --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0cc8017919270..1fd7ee1dfb83e 100644 --- a/package.json +++ b/package.json @@ -1661,4 +1661,4 @@ "yargs": "^15.4.1", "yarn-deduplicate": "^6.0.2" } -} +} \ No newline at end of file From beaf320ec9ebd7c21f33038dfaa23c8eb5149c93 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Wed, 13 Dec 2023 09:00:32 +0100 Subject: [PATCH 11/13] Mock logger.debug() in tests --- .../server/service/client/index.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugins/observability_ai_assistant/server/service/client/index.test.ts b/x-pack/plugins/observability_ai_assistant/server/service/client/index.test.ts index 062377cda5112..0349e5ec899f9 100644 --- a/x-pack/plugins/observability_ai_assistant/server/service/client/index.test.ts +++ b/x-pack/plugins/observability_ai_assistant/server/service/client/index.test.ts @@ -90,6 +90,7 @@ describe('Observability AI Assistant service', () => { const loggerMock: DeeplyMockedKeys = { log: jest.fn(), error: jest.fn(), + debug: jest.fn(), } as any; const functionClientMock: DeeplyMockedKeys = { From 385da0ab25270bd4ab3e7f24b68b5d6881dc0a1f Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Wed, 13 Dec 2023 09:34:32 +0100 Subject: [PATCH 12/13] Lockfile changes --- yarn.lock | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/yarn.lock b/yarn.lock index e0da4d0cde045..86ecec944acad 100644 --- a/yarn.lock +++ b/yarn.lock @@ -16631,18 +16631,7 @@ fast-glob@^2.2.6: merge2 "^1.2.3" micromatch "^3.1.10" -fast-glob@^3.0.3, fast-glob@^3.1.1, fast-glob@^3.2.11, fast-glob@^3.2.2, fast-glob@^3.2.7, fast-glob@^3.2.9: - version "3.2.12" - resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.12.tgz#7f39ec99c2e6ab030337142da9e0c18f37afae80" - integrity sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w== - dependencies: - "@nodelib/fs.stat" "^2.0.2" - "@nodelib/fs.walk" "^1.2.3" - glob-parent "^5.1.2" - merge2 "^1.3.0" - micromatch "^4.0.4" - -fast-glob@^3.3.2: +fast-glob@^3.0.3, fast-glob@^3.1.1, fast-glob@^3.2.11, fast-glob@^3.2.2, fast-glob@^3.2.7, fast-glob@^3.2.9, fast-glob@^3.3.2: version "3.3.2" resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.3.2.tgz#a904501e57cfdd2ffcded45e99a54fef55e46129" integrity sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow== @@ -28613,18 +28602,7 @@ tabbable@^5.3.3: resolved "https://registry.yarnpkg.com/tabbable/-/tabbable-5.3.3.tgz#aac0ff88c73b22d6c3c5a50b1586310006b47fbf" integrity sha512-QD9qKY3StfbZqWOPLp0++pOrAVb/HbUi5xCc8cUo4XjP19808oaMiDzn0leBY5mCespIBM0CIZePzZjgzR83kA== -table@^6.8.0: - version "6.8.0" - resolved "https://registry.yarnpkg.com/table/-/table-6.8.0.tgz#87e28f14fa4321c3377ba286f07b79b281a3b3ca" - integrity sha512-s/fitrbVeEyHKFa7mFdkuQMWlH1Wgw/yEXMt5xACT4ZpzWFluehAxRtUUQKPuWhaLAWhFcVx6w3oC8VKaUfPGA== - dependencies: - ajv "^8.0.1" - lodash.truncate "^4.4.2" - slice-ansi "^4.0.0" - string-width "^4.2.3" - strip-ansi "^6.0.1" - -table@^6.8.1: +table@^6.8.0, table@^6.8.1: version "6.8.1" resolved "https://registry.yarnpkg.com/table/-/table-6.8.1.tgz#ea2b71359fe03b017a5fbc296204471158080bdf" integrity sha512-Y4X9zqrCftUhMeH2EptSSERdVKt/nEdijTOacGD/97EKjhQ/Qs8RTlEGABSJNNN8lac9kheH+af7yAkEWlgneA== From c32b8be233ca121a75c02a607caea0871865c290 Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Wed, 13 Dec 2023 10:24:10 +0100 Subject: [PATCH 13/13] linting errors --- .../observability_ai_assistant/scripts/evaluation/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js index efac843755ed4..963e1a2ecfbed 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js @@ -6,5 +6,5 @@ */ require('@kbn/babel-register').install(); -//eslint-disable-next-line @kbn/imports/uniform_imports -require('./index.ts'); + +require('./evaluation');