[8.12] [Obs AI Assistant] Automatically execute ES|QL queries (#174081)…

… (#174868) # Backport This will backport the following commits from `main` to `8.12`: - [[Obs AI Assistant] Automatically execute ES|QL queries (#174081)](#174081)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Dario Gieselaar <[email protected]>
elastic · Jan 15, 2024 · 81ca1e2 · 81ca1e2
1 parent 73b1a19
commit 81ca1e2
Show file tree

Hide file tree

Showing 91 changed files with 794 additions and 134 deletions.
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.spec.ts
@@ -7,48 +7,21 @@
 
 /// <reference types="@kbn/ambient-ftr-types"/>
 
-import { last } from 'lodash';
-import moment from 'moment';
 import { apm, timerange } from '@kbn/apm-synthtrace-client';
 import expect from '@kbn/expect';
-import { MessageRole } from '../../../../common';
+import moment from 'moment';
 import { chatClient, esClient, synthtraceEsClients } from '../../services';
 
-function extractEsqlQuery(response: string) {
-  return response.match(/```esql([\s\S]*?)```/)?.[1];
-}
-
 async function evaluateEsqlQuery({
   question,
   expected,
   criteria = [],
-  execute = true,
 }: {
   question: string;
   expected?: string;
   criteria?: string[];
-  execute?: boolean;
 }): Promise<void> {
-  let conversation = await chatClient.complete(question);
-
-  const esqlQuery = extractEsqlQuery(last(conversation.messages)?.content || '');
-
-  if (esqlQuery && execute) {
-    conversation = await chatClient.complete(
-      conversation.conversationId!,
-      conversation.messages.concat({
-        content: '',
-        role: MessageRole.Assistant,
-        function_call: {
-          name: 'execute_query',
-          arguments: JSON.stringify({
-            query: esqlQuery,
-          }),
-          trigger: MessageRole.User,
-        },
-      })
-    );
-  }
+  const conversation = await chatClient.complete(question);
 
   const evaluation = await chatClient.evaluate(conversation, [
     ...(expected
@@ -57,7 +30,7 @@ async function evaluateEsqlQuery({
       ${expected}`,
         ]
       : []),
-    ...(execute && expected ? [`The query successfully executed without an error`] : []),
+    ...(expected ? [`The query successfully executed without an error`] : []),
     ...criteria,
   ]);
 
@@ -146,7 +119,6 @@ describe('ES|QL query generation', () => {
           | SORT hire_date
           | KEEP emp_no, hire_date_formatted
           | LIMIT 5`,
-          execute: false,
         });
       });
 

diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/format_esql_examples.ts b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/format_esql_examples.ts
@@ -0,0 +1,19 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export function formatEsqlExamples(content: string) {
+  // Regular expression to match the queries
+  const queryRegex = /(\s*(FROM |ROW |SHOW ).*?)(?=\n[^|\s]|$)/gs;
+
+  // Function to format a matched query
+  const formatQuery = (match: string) => {
+    return `\n\`\`\`esql\n${match.trim()}\n\`\`\`\n`;
+  };
+
+  // Replace all matches in the input string
+  return content.replace(queryRegex, formatQuery);
+}
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts
@@ -15,6 +15,7 @@ import Path from 'path';
 import git, { SimpleGitProgressEvent } from 'simple-git';
 import yargs, { Argv } from 'yargs';
 import { extractSections } from './extract_sections';
+import { formatEsqlExamples } from './format_esql_examples';
 
 yargs(process.argv.slice(2))
   .command(
@@ -221,7 +222,19 @@ yargs(process.argv.slice(2))
                   outDir,
                   `esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt`
                 );
-                await Fs.writeFile(fileName, doc.content);
+
+                // We ask the LLM to output queries wrapped in ```esql...```,
+                // so we try to format ES|QL examples in the docs in the same
+                // way. The hope is that this creates a stronger relation in the
+                // output.
+                const formattedContent = formatEsqlExamples(doc.content);
+
+                log.debug({
+                  content: doc.content,
+                  formattedContent,
+                });
+
+                await Fs.writeFile(fileName, formattedContent);
               })
             )
           );

diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-abs.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-abs.txt
@@ -1,8 +1,18 @@
 ABS
 
+Syntax
+Parameters
+n
+Numeric expression. If null, the function returns null.
+DescriptionReturns the absolute value.Supported types
+Examples
+```esql
+ROW number = -1.0
+| EVAL abs_number = ABS(number)
+```
 
-Returns the absolute value.
+```esql
 FROM employees
 | KEEP first_name, last_name, height
 | EVAL abs_height = ABS(0.0 - height)
-Supported types:
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-acos.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-acos.txt
@@ -7,5 +7,7 @@ Numeric expression. If null, the function returns null.
 DescriptionReturns the arccosine of n as an
 angle, expressed in radians.Supported types
 Example
+```esql
 ROW a=.9
 | EVAL acos=ACOS(a)
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-asin.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-asin.txt
@@ -1,7 +1,14 @@
 ASIN
 
-
-Inverse sine trigonometric function.
+Syntax
+Parameters
+n
+Numeric expression. If null, the function returns null.
+DescriptionReturns the
+arcsine
+of the input numeric expression as an angle, expressed in radians.Supported types
+Example
+```esql
 ROW a=.9
 | EVAL asin=ASIN(a)
-Supported types:
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-atan.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-atan.txt
@@ -1,7 +1,14 @@
 ATAN
 
-
-Inverse tangent trigonometric function.
+Syntax
+Parameters
+n
+Numeric expression. If null, the function returns null.
+DescriptionReturns the
+arctangent of the
+input numeric expression as an angle, expressed in radians.Supported types
+Example
+```esql
 ROW a=12.9
 | EVAL atan=ATAN(a)
-Supported types:
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-atan2.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-atan2.txt
@@ -1,8 +1,16 @@
 ATAN2
 
-
-The angle between the positive x-axis and the
-ray from the origin to the point (x , y) in the Cartesian plane.
+Syntax
+Parameters
+y
+Numeric expression. If null, the function returns null.
+x
+Numeric expression. If null, the function returns null.
+DescriptionThe angle between the positive x-axis and
+the ray from the origin to the point (x , y) in the Cartesian plane, expressed
+in radians.Supported types
+Example
+```esql
 ROW y=12.9, x=.6
 | EVAL atan2=ATAN2(y, x)
-Supported types:
+```
diff --git a/...k/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-auto_bucket.txt b/...k/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-auto_bucket.txt
@@ -1,27 +1,83 @@
 AUTO_BUCKET
 
-Creates human-friendly buckets and returns a datetime value for each row that
-corresponds to the resulting bucket the row falls into. Combine AUTO_BUCKET
-with STATS ... BY to create a date histogram.You provide a target number of buckets, a start date, and an end date, and it
-picks an appropriate bucket size to generate the target number of buckets or
-fewer. For example, this asks for at most 20 buckets over a whole year, which
-picks monthly buckets:
-ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
-| EVAL bucket=AUTO_BUCKET(date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+Syntax
+AUTO_BUCKET(field, buckets, from, to)
+Parameters
+field
+Numeric or date column from which to derive buckets.
+buckets
+Target number of buckets.
+from
+Start of the range. Can be a number or a date expressed as a string.
+to
+End of the range. Can be a number or a date expressed as a string.
+DescriptionCreates human-friendly buckets and returns a value for each row that corresponds
+to the resulting bucket the row falls into.Using a target number of buckets, a start of a range, and an end of a range,
+AUTO_BUCKET picks an appropriate bucket size to generate the target number of
+buckets or fewer. For example, asking for at most 20 buckets over a year results
+in monthly buckets:
+```esql
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| EVAL month = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| KEEP hire_date, month
+| SORT hire_date
+```
+
 The goal isn’t to provide exactly the target number of buckets, it’s to pick a
-range that people are comfortable with that provides at most the target number of
-buckets.If you ask for more buckets then AUTO_BUCKET can pick a smaller range. For example,
-asking for at most 100 buckets in a year will get you week long buckets:
-ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z")
-| EVAL bucket=AUTO_BUCKET(date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
-AUTO_BUCKET does not filter any rows. It only uses the provided time range to
-pick a good bucket size. For rows with a date outside of the range, it returns a
-datetime that corresponds to a bucket outside the range. Combine AUTO_BUCKET
-with WHERE to filter rows.A more complete example might look like:
+range that people are comfortable with that provides at most the target number
+of buckets.Combine AUTO_BUCKET with
+STATS ... BY to create a histogram:
+```esql
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| EVAL month = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| STATS hires_per_month = COUNT(*) BY month
+| SORT month
+```
+
+AUTO_BUCKET does not create buckets that don’t match any documents.
+That’s why this example is missing 1985-03-01 and other dates.
+Asking for more buckets can result in a smaller range. For example, asking for
+at most 100 buckets in a year results in weekly buckets:
+```esql
+FROM employees
+| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
+| EVAL week = AUTO_BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
+| STATS hires_per_week = COUNT(*) BY week
+| SORT week
+```
+
+AUTO_BUCKET does not filter any rows. It only uses the provided range to
+pick a good bucket size. For rows with a value outside of the range, it returns
+a bucket value that corresponds to a bucket outside the range. Combine
+AUTO_BUCKET with WHERE to filter rows.
+AUTO_BUCKET can also operate on numeric fields. For example, to create a
+salary histogram:
+```esql
+FROM employees
+| EVAL bs = AUTO_BUCKET(salary, 20, 25324, 74999)
+| STATS COUNT(*) by bs
+| SORT bs
+```
+
+Unlike the earlier example that intentionally filters on a date range, you
+rarely want to filter on a numeric range. You have to find the min and max
+separately. ES|QL doesn’t yet have an easy way to do that automatically.ExamplesCreate hourly buckets for the last 24 hours, and calculate the number of events
+per hour:
+```esql
+FROM sample_data
+| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()
+| EVAL bucket = AUTO_BUCKET(@timestamp, 25, DATE_FORMAT(NOW() - 1 day), DATE_FORMAT(NOW()))
+| STATS COUNT(*) BY bucket
+```
+
+Create monthly buckets for the year 1985, and calculate the average salary by
+hiring month:
+```esql
 FROM employees
 | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
 | EVAL bucket = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")
 | STATS AVG(salary) BY bucket
 | SORT bucket
-AUTO_BUCKET does not create buckets that don’t match any documents. That’s
-why the example above is missing 1985-03-01 and other dates.
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-avg.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-avg.txt
@@ -1,6 +1,11 @@
 AVG
 
-The average of a numeric field.
+Syntax
+AVG(column)
+column
+Numeric column. If null, the function returns null.
+DescriptionThe average of a numeric field.Supported typesThe result is always a double no matter the input type.Example
+```esql
 FROM employees
 | STATS AVG(height)
-The result is always a double not matter the input type.
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-case.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-case.txt
@@ -13,10 +13,32 @@ The default value that’s is returned when no condition matches.
 DescriptionAccepts pairs of conditions and values. The function returns the value that
 belongs to the first condition that evaluates to true.If the number of arguments is odd, the last argument is the default value which
 is returned when no condition matches. If the number of arguments is even, and
-no condition matches, the function returns null.Example
+no condition matches, the function returns null.ExampleDetermine whether employees are monolingual, bilingual, or polyglot:
+```esql
 FROM employees
 | EVAL type = CASE(
     languages <= 1, "monolingual",
     languages <= 2, "bilingual",
      "polyglot")
 | KEEP emp_no, languages, type
+```
+
+Calculate the total connection success rate based on log messages:
+```esql
+FROM sample_data
+| EVAL successful = CASE(
+    STARTS_WITH(message, "Connected to"), 1,
+    message == "Connection error", 0
+  )
+| STATS success_rate = AVG(successful)
+```
+
+Calculate an hourly error rate as a percentage of the total number of log
+messages:
+```esql
+FROM sample_data
+| EVAL error = CASE(message LIKE "*error*", 1, 0)
+| EVAL hour = DATE_TRUNC(1 hour, @timestamp)
+| STATS error_rate = AVG(error) by hour
+| SORT hour
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-ceil.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-ceil.txt
@@ -1,10 +1,16 @@
 CEIL
 
-
-Round a number up to the nearest integer.
+Syntax
+Parameters
+n
+Numeric expression. If null, the function returns null.
+DescriptionRound a number up to the nearest integer.
+This is a noop for long (including unsigned) and integer.
+      For double this picks the closest double value to the integer
+      similar to Math.ceil.
+Supported types
+Example
+```esql
 ROW a=1.8
 | EVAL a=CEIL(a)
-This is a noop for long (including unsigned) and integer.
-      For double this picks the the closest double value to the integer ala
-      Math.ceil.
-Supported types:
+```
diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-coalesce.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/esql_docs/esql-coalesce.txt
@@ -1,5 +1,13 @@
 COALESCE
 
-Returns the first non-null value.
+Syntax
+COALESCE(expression1 [, ..., expressionN])
+Parameters
+expressionX
+Expression to evaluate.
+DescriptionReturns the first of its arguments that is not null. If all arguments are null,
+it returns null.Example
+```esql
 ROW a=null, b="b"
 | EVAL COALESCE(a, b)
+```