Skip to content

Commit

Permalink
[8.x] [NL-to-ESQL] `correctCommonEsqlMistakes`: add timespa…
Browse files Browse the repository at this point in the history
…n literals auto-correct (#202190) (#202442)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan
literals auto-correct
(#202190)](#202190)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Pierre
Gayvallet","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-12-02T11:37:02Z","message":"[NL-to-ESQL]
`correctCommonEsqlMistakes`: add timespan literals auto-correct
(#202190)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar
regarding using string literals instead of timespan\r\nliterals for
`DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way
for additional AST-based grammar
corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM
logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL
trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY
hour = BUCKET(hire_date, \"3
HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year
= DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month,
date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3
hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:version","Team:AI
Infra","v8.18.0"],"title":"[NL-to-ESQL] `correctCommonEsqlMistakes`: add
timespan literals
auto-correct","number":202190,"url":"https://github.com/elastic/kibana/pull/202190","mergeCommit":{"message":"[NL-to-ESQL]
`correctCommonEsqlMistakes`: add timespan literals auto-correct
(#202190)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar
regarding using string literals instead of timespan\r\nliterals for
`DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way
for additional AST-based grammar
corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM
logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL
trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY
hour = BUCKET(hire_date, \"3
HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year
= DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month,
date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3
hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/202190","number":202190,"mergeCommit":{"message":"[NL-to-ESQL]
`correctCommonEsqlMistakes`: add timespan literals auto-correct
(#202190)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar
regarding using string literals instead of timespan\r\nliterals for
`DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way
for additional AST-based grammar
corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM
logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL
trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY
hour = BUCKET(hire_date, \"3
HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year
= DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month,
date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3
hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb"}},{"branch":"8.x","label":"v8.18.0","branchLabelMappingKey":"^v8.18.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Pierre Gayvallet <[email protected]>
  • Loading branch information
kibanamachine and pgayvallet authored Dec 2, 2024
1 parent 8b44bc2 commit 50c0d92
Show file tree
Hide file tree
Showing 22 changed files with 573 additions and 169 deletions.
8 changes: 1 addition & 7 deletions x-pack/plugins/inference/common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,7 @@
* 2.0.
*/

export {
correctCommonEsqlMistakes,
splitIntoCommands,
} from './tasks/nl_to_esql/correct_common_esql_mistakes';

export { correctCommonEsqlMistakes, splitIntoCommands } from './tasks/nl_to_esql';
export { generateFakeToolCallId } from './utils/generate_fake_tool_call_id';

export { createOutputApi } from './output';

export type { ChatCompleteRequestBody, GetConnectorsResponseBody } from './http_apis';
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ESQLTimeInterval } from '@kbn/esql-ast';

const units = [
'millisecond',
'milliseconds',
'ms',
//
'second',
'seconds',
'sec',
's',
//
'minute',
'minutes',
'min',
//
'hour',
'hours',
'h',
//
'day',
'days',
'd',
//
'week',
'weeks',
'w',
//
'month',
'months',
'mo',
//
'quarter',
'quarters',
'q',
//
'year',
'years',
'yr',
'y',
];

const timespanStringRegexp = new RegExp(`^["']?([0-9]+)?\\s*?(${units.join('|')})["']?$`, 'i');

export function createTimespanLiteral(unit: string, quantity: number): ESQLTimeInterval {
return {
type: 'timeInterval',
quantity,
unit,
text: `${unit}${quantity}`,
name: `${unit} ${quantity}`,
incomplete: false,
location: { min: 0, max: 0 },
};
}

export function isTimespanString(str: string): boolean {
return Boolean(str.match(timespanStringRegexp));
}

export function stringToTimespanLiteral(str: string): ESQLTimeInterval {
const match = timespanStringRegexp.exec(str);
if (!match) {
throw new Error(`String "${str}" cannot be converted to timespan literal`);
}
const [_, quantity, unit] = match;

return createTimespanLiteral(unit.toLowerCase(), quantity ? parseInt(quantity, 10) : 1);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { BasicPrettyPrinter, parse } from '@kbn/esql-ast';
import { correctAll, type QueryCorrection } from './corrections';

interface CorrectWithAstResult {
output: string;
corrections: QueryCorrection[];
}

export const correctQueryWithAst = (query: string): CorrectWithAstResult => {
const { root, errors } = parse(query);
// don't try modifying anything if the query is not syntactically correct
if (errors) {
return {
output: query,
corrections: [],
};
}

const corrections = correctAll(root);

const multiline = /\r?\n/.test(query);
const formattedQuery = BasicPrettyPrinter.print(root, { multiline, pipeTab: '' });

return {
output: formattedQuery,
corrections,
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ESQLAstQueryExpression } from '@kbn/esql-ast';
import type { QueryCorrection } from './types';
import { applyTimespanLiteralsCorrections } from './timespan_literals';

export type { QueryCorrection } from './types';

export const correctAll = (query: ESQLAstQueryExpression): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];
corrections.push(...applyTimespanLiteralsCorrections(query));
return corrections;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { parse, BasicPrettyPrinter } from '@kbn/esql-ast';
import { applyTimespanLiteralsCorrections } from './timespan_literals';

describe('getTimespanLiteralsCorrections', () => {
describe('with DATE_TRUNC', () => {
it('replaces a timespan with a proper timespan literal', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"`
);
});

it('replaces a timespan without quantity', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("month", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 month, date)"`
);
});

it('replaces uppercase literals', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 YEAR", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"`
);
});

it('returns info about the correction', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
type: 'string_as_timespan_literal',
description:
'Replaced string literal with timespan literal in DATE_TRUNC function at position 29',
node: expect.any(Object),
});
});
});

describe('with BUCKET', () => {
it('replaces a timespan with a proper timespan literal', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 week")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"`
);
});

it('replaces a timespan without quantity', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 1 hour)"`
);
});

it('replaces uppercase literals', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 WEEK")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"`
);
});

it('returns info about the correction', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
type: 'string_as_timespan_literal',
description:
'Replaced string literal with timespan literal in BUCKET function at position 45',
node: expect.any(Object),
});
});
});

describe('with mixed usages', () => {
it('find all occurrences in a complex query', () => {
const query = `FROM logs
| EVAL trunc_year = DATE_TRUNC("1 year", date)
| EVAL trunc_month = DATE_TRUNC("month", date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "3 hour")`;
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root, { multiline: true, pipeTab: '' });

expect(output).toMatchInlineSnapshot(`
"FROM logs
| EVAL trunc_year = DATE_TRUNC(1 year, date)
| EVAL trunc_month = DATE_TRUNC(1 month, date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)"
`);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { Walker, type ESQLAstQueryExpression } from '@kbn/esql-ast';
import { isDateTruncFunctionNode, isBucketFunctionNode, isStringLiteralNode } from '../typeguards';
import type { ESQLDateTruncFunction, ESQLBucketFunction } from '../types';
import { stringToTimespanLiteral, isTimespanString } from '../ast_tools/timespan';
import { QueryCorrection } from './types';

/**
* Correct timespan literal grammar mistakes, and returns the list of corrections that got applied.
*
* E.g.
* `DATE_TRUNC("YEAR", @timestamp)` => `DATE_TRUNC(1 year, @timestamp)`
* `BUCKET(@timestamp, "1 week")` => `BUCKET(@timestamp, 1 week)`
*
*/
export const applyTimespanLiteralsCorrections = (
query: ESQLAstQueryExpression
): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];

Walker.walk(query, {
visitFunction: (node) => {
if (isDateTruncFunctionNode(node)) {
corrections.push(...checkDateTrunc(node));
}
if (isBucketFunctionNode(node)) {
corrections.push(...checkBucket(node));
}
},
});

return corrections;
};

function checkDateTrunc(node: ESQLDateTruncFunction): QueryCorrection[] {
if (node.args.length !== 2) {
return [];
}

const firstArg = node.args[0];

if (isStringLiteralNode(firstArg) && isTimespanString(firstArg.value)) {
const replacement = stringToTimespanLiteral(firstArg.value);
node.args[0] = replacement;

const correction: QueryCorrection = {
type: 'string_as_timespan_literal',
node,
description: `Replaced string literal with timespan literal in DATE_TRUNC function at position ${node.location.min}`,
};
return [correction];
}

return [];
}

function checkBucket(node: ESQLBucketFunction): QueryCorrection[] {
// only checking the 2 args version - e.g. BUCKET(hire_date, 1 week)
if (node.args.length !== 2) {
return [];
}

const secondArg = node.args[1];

if (isStringLiteralNode(secondArg) && isTimespanString(secondArg.value)) {
const replacement = stringToTimespanLiteral(secondArg.value);
node.args[1] = replacement;

const correction: QueryCorrection = {
type: 'string_as_timespan_literal',
node,
description: `Replaced string literal with timespan literal in BUCKET function at position ${node.location.min}`,
};
return [correction];
}

return [];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ESQLSingleAstItem } from '@kbn/esql-ast';

/**
* Represents a correction that was applied to the query
*/
export interface QueryCorrection {
/** The type of correction */
type: string;
/** A human-friendly-ish description of the correction */
description: string;
/** The parent node the correction was applied to */
node: ESQLSingleAstItem;
}
Loading

0 comments on commit 50c0d92

Please sign in to comment.