README.md

elastic · Dec 10, 2023 · 6c9a3fc · 6c9a3fc
1 parent 1a18f39
commit 6c9a3fc
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 5 deletions.
diff --git a/package.json b/package.json
@@ -1661,4 +1661,4 @@
     "yargs": "^15.4.1",
     "yarn-deduplicate": "^6.0.2"
   }
-}
+}
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/README.md
@@ -0,0 +1,37 @@
+# Observability AI Assistant Evaluation Framework
+
+## Overview
+
+This tool is developed for our team working on the Elastic Observability platform, specifically focusing on evaluating the Observability AI Assistant. It simplifies scripting and evaluating various scenarios with the Large Language Model (LLM) integration.
+
+## Setup requirements
+
+- An Elasticsearch instance
+- A Kibana instance
+- At least one .gen-ai connector set up
+
+## Running evaluations
+
+Run the tool using:
+
+`$ node x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.js`
+
+This will evaluate all existing scenarios, and write the evaluation results to the terminal.
+
+### Configuration
+
+#### Kibana and Elasticsearch
+
+By default, the tool will look for a Kibana instance running locally (at `http://localhost:5601`, which is the default address for running Kibana in development mode). It will also attempt to read the Kibana config file for the Elasticsearch address & credentials. If you want to override these settings, use `--kibana` and `--es`. Only basic auth is supported, e.g. `--kibana http://username:password@localhost:5601`.
+
+#### Connector
+
+Use `--connectorId` to specify a `.gen-ai` connector to use. If none are given, it will prompt you to select a connector based on the ones that are available. If only a single `.gen-ai` connector is found, it will be used without prompting.
+
+#### Persisting conversations
+
+By default, completed conversations are not persisted. If you do want to persist them, for instance for reviewing purposes, set the `--persist` flag to store them. This will also generate a clickable link in the output of the evaluation that takes you to the conversation.
+
+If you want to clear conversations on startup, use the `--clear` flag. This only works when `--persist` is enabled.
+
+When storing conversations, the name of the scenario is used as a title. Set the `--autoTitle` flag to have the LLM generate a title for you.
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/cli.ts
@@ -12,10 +12,10 @@ export function options(y: Argv) {
   const config = readKibanaConfig();
 
   return y
-    .positional('grep', {
+    .positional('files', {
       string: true as const,
       array: true,
-      describe: 'A glob pattern for which scenarios to evaluate',
+      describe: 'A file or list of files containing the scenarios to evaluate. Defaults to all',
     })
     .option('kibana', {
       describe: 'Where Kibana is running',

diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/index.ts
@@ -67,8 +67,8 @@ function runEvaluations() {
           log.info(`Using connector ${connector.id}`);
 
           const scenarios =
-            (argv.grep !== undefined &&
-              castArray(argv.grep).map((file) => Path.join(process.cwd(), file))) ||
+            (argv.files !== undefined &&
+              castArray(argv.files).map((file) => Path.join(process.cwd(), file))) ||
             glob.sync(Path.join(__dirname, './scenarios/**/*.ts'));
 
           if (!scenarios.length) {
-Original file line number
+Diff line change
@@ Expand Up / @@ -1661,4 +1661,4 @@ @@
         "yargs": "^15.4.1",
         "yarn-deduplicate": "^6.0.2"
       }
-    }
+    }