-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[8.x] [NL-to-ESQL] `correctCommonEsqlMistakes`: add timespa…
…n literals auto-correct (#202190) (#202442) # Backport This will backport the following commits from `main` to `8.x`: - [[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan literals auto-correct (#202190)](#202190) <!--- Backport version: 9.4.3 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Pierre Gayvallet","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-12-02T11:37:02Z","message":"[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan literals auto-correct (#202190)\n\n## Summary\r\n\r\nPart of https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar regarding using string literals instead of timespan\r\nliterals for `DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way for additional AST-based grammar corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, \"3 HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month, date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:version","Team:AI Infra","v8.18.0"],"title":"[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan literals auto-correct","number":202190,"url":"https://github.com/elastic/kibana/pull/202190","mergeCommit":{"message":"[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan literals auto-correct (#202190)\n\n## Summary\r\n\r\nPart of https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar regarding using string literals instead of timespan\r\nliterals for `DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way for additional AST-based grammar corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, \"3 HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month, date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/202190","number":202190,"mergeCommit":{"message":"[NL-to-ESQL] `correctCommonEsqlMistakes`: add timespan literals auto-correct (#202190)\n\n## Summary\r\n\r\nPart of https://github.com/elastic/kibana/issues/198942\r\n\r\nFixes bad grammar regarding using string literals instead of timespan\r\nliterals for `DATE_TRUNC` and `BUCKET` functions.\r\n\r\nThis PR also paves the way for additional AST-based grammar corrections\r\n\r\n\r\n**Example**\r\n\r\n*Input*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(\"1 year\", date)\r\n| EVAL trunc_month = DATE_TRUNC(\"month\", date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, \"3 HOUR\")\r\n```\r\n*Output*\r\n```esql\r\nFROM logs\r\n| EVAL trunc_year = DATE_TRUNC(1 year, date)\r\n| EVAL trunc_month = DATE_TRUNC(1 month, date)\r\n| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)\r\n```","sha":"742854f8bcca580a1c5c07fc516cf0b29fa0f0cb"}},{"branch":"8.x","label":"v8.18.0","branchLabelMappingKey":"^v8.18.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Pierre Gayvallet <[email protected]>
- Loading branch information
1 parent
8b44bc2
commit 50c0d92
Showing
22 changed files
with
573 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/ast_tools/timespan.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { ESQLTimeInterval } from '@kbn/esql-ast'; | ||
|
||
const units = [ | ||
'millisecond', | ||
'milliseconds', | ||
'ms', | ||
// | ||
'second', | ||
'seconds', | ||
'sec', | ||
's', | ||
// | ||
'minute', | ||
'minutes', | ||
'min', | ||
// | ||
'hour', | ||
'hours', | ||
'h', | ||
// | ||
'day', | ||
'days', | ||
'd', | ||
// | ||
'week', | ||
'weeks', | ||
'w', | ||
// | ||
'month', | ||
'months', | ||
'mo', | ||
// | ||
'quarter', | ||
'quarters', | ||
'q', | ||
// | ||
'year', | ||
'years', | ||
'yr', | ||
'y', | ||
]; | ||
|
||
const timespanStringRegexp = new RegExp(`^["']?([0-9]+)?\\s*?(${units.join('|')})["']?$`, 'i'); | ||
|
||
export function createTimespanLiteral(unit: string, quantity: number): ESQLTimeInterval { | ||
return { | ||
type: 'timeInterval', | ||
quantity, | ||
unit, | ||
text: `${unit}${quantity}`, | ||
name: `${unit} ${quantity}`, | ||
incomplete: false, | ||
location: { min: 0, max: 0 }, | ||
}; | ||
} | ||
|
||
export function isTimespanString(str: string): boolean { | ||
return Boolean(str.match(timespanStringRegexp)); | ||
} | ||
|
||
export function stringToTimespanLiteral(str: string): ESQLTimeInterval { | ||
const match = timespanStringRegexp.exec(str); | ||
if (!match) { | ||
throw new Error(`String "${str}" cannot be converted to timespan literal`); | ||
} | ||
const [_, quantity, unit] = match; | ||
|
||
return createTimespanLiteral(unit.toLowerCase(), quantity ? parseInt(quantity, 10) : 1); | ||
} |
35 changes: 35 additions & 0 deletions
35
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/correct_with_ast.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { BasicPrettyPrinter, parse } from '@kbn/esql-ast'; | ||
import { correctAll, type QueryCorrection } from './corrections'; | ||
|
||
interface CorrectWithAstResult { | ||
output: string; | ||
corrections: QueryCorrection[]; | ||
} | ||
|
||
export const correctQueryWithAst = (query: string): CorrectWithAstResult => { | ||
const { root, errors } = parse(query); | ||
// don't try modifying anything if the query is not syntactically correct | ||
if (errors) { | ||
return { | ||
output: query, | ||
corrections: [], | ||
}; | ||
} | ||
|
||
const corrections = correctAll(root); | ||
|
||
const multiline = /\r?\n/.test(query); | ||
const formattedQuery = BasicPrettyPrinter.print(root, { multiline, pipeTab: '' }); | ||
|
||
return { | ||
output: formattedQuery, | ||
corrections, | ||
}; | ||
}; |
18 changes: 18 additions & 0 deletions
18
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/corrections/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { ESQLAstQueryExpression } from '@kbn/esql-ast'; | ||
import type { QueryCorrection } from './types'; | ||
import { applyTimespanLiteralsCorrections } from './timespan_literals'; | ||
|
||
export type { QueryCorrection } from './types'; | ||
|
||
export const correctAll = (query: ESQLAstQueryExpression): QueryCorrection[] => { | ||
const corrections: QueryCorrection[] = []; | ||
corrections.push(...applyTimespanLiteralsCorrections(query)); | ||
return corrections; | ||
}; |
144 changes: 144 additions & 0 deletions
144
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/corrections/timespan_literals.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { parse, BasicPrettyPrinter } from '@kbn/esql-ast'; | ||
import { applyTimespanLiteralsCorrections } from './timespan_literals'; | ||
|
||
describe('getTimespanLiteralsCorrections', () => { | ||
describe('with DATE_TRUNC', () => { | ||
it('replaces a timespan with a proper timespan literal', () => { | ||
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"` | ||
); | ||
}); | ||
|
||
it('replaces a timespan without quantity', () => { | ||
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("month", date)'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | EVAL truncated = DATE_TRUNC(1 month, date)"` | ||
); | ||
}); | ||
|
||
it('replaces uppercase literals', () => { | ||
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 YEAR", date)'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"` | ||
); | ||
}); | ||
|
||
it('returns info about the correction', () => { | ||
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)'; | ||
const { root } = parse(query); | ||
|
||
const corrections = applyTimespanLiteralsCorrections(root); | ||
|
||
expect(corrections).toHaveLength(1); | ||
expect(corrections[0]).toEqual({ | ||
type: 'string_as_timespan_literal', | ||
description: | ||
'Replaced string literal with timespan literal in DATE_TRUNC function at position 29', | ||
node: expect.any(Object), | ||
}); | ||
}); | ||
}); | ||
|
||
describe('with BUCKET', () => { | ||
it('replaces a timespan with a proper timespan literal', () => { | ||
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 week")'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"` | ||
); | ||
}); | ||
|
||
it('replaces a timespan without quantity', () => { | ||
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 1 hour)"` | ||
); | ||
}); | ||
|
||
it('replaces uppercase literals', () => { | ||
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 WEEK")'; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root); | ||
|
||
expect(output).toMatchInlineSnapshot( | ||
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"` | ||
); | ||
}); | ||
|
||
it('returns info about the correction', () => { | ||
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")'; | ||
const { root } = parse(query); | ||
|
||
const corrections = applyTimespanLiteralsCorrections(root); | ||
|
||
expect(corrections).toHaveLength(1); | ||
expect(corrections[0]).toEqual({ | ||
type: 'string_as_timespan_literal', | ||
description: | ||
'Replaced string literal with timespan literal in BUCKET function at position 45', | ||
node: expect.any(Object), | ||
}); | ||
}); | ||
}); | ||
|
||
describe('with mixed usages', () => { | ||
it('find all occurrences in a complex query', () => { | ||
const query = `FROM logs | ||
| EVAL trunc_year = DATE_TRUNC("1 year", date) | ||
| EVAL trunc_month = DATE_TRUNC("month", date) | ||
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "3 hour")`; | ||
const { root } = parse(query); | ||
|
||
applyTimespanLiteralsCorrections(root); | ||
|
||
const output = BasicPrettyPrinter.print(root, { multiline: true, pipeTab: '' }); | ||
|
||
expect(output).toMatchInlineSnapshot(` | ||
"FROM logs | ||
| EVAL trunc_year = DATE_TRUNC(1 year, date) | ||
| EVAL trunc_month = DATE_TRUNC(1 month, date) | ||
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)" | ||
`); | ||
}); | ||
}); | ||
}); |
84 changes: 84 additions & 0 deletions
84
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/corrections/timespan_literals.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { Walker, type ESQLAstQueryExpression } from '@kbn/esql-ast'; | ||
import { isDateTruncFunctionNode, isBucketFunctionNode, isStringLiteralNode } from '../typeguards'; | ||
import type { ESQLDateTruncFunction, ESQLBucketFunction } from '../types'; | ||
import { stringToTimespanLiteral, isTimespanString } from '../ast_tools/timespan'; | ||
import { QueryCorrection } from './types'; | ||
|
||
/** | ||
* Correct timespan literal grammar mistakes, and returns the list of corrections that got applied. | ||
* | ||
* E.g. | ||
* `DATE_TRUNC("YEAR", @timestamp)` => `DATE_TRUNC(1 year, @timestamp)` | ||
* `BUCKET(@timestamp, "1 week")` => `BUCKET(@timestamp, 1 week)` | ||
* | ||
*/ | ||
export const applyTimespanLiteralsCorrections = ( | ||
query: ESQLAstQueryExpression | ||
): QueryCorrection[] => { | ||
const corrections: QueryCorrection[] = []; | ||
|
||
Walker.walk(query, { | ||
visitFunction: (node) => { | ||
if (isDateTruncFunctionNode(node)) { | ||
corrections.push(...checkDateTrunc(node)); | ||
} | ||
if (isBucketFunctionNode(node)) { | ||
corrections.push(...checkBucket(node)); | ||
} | ||
}, | ||
}); | ||
|
||
return corrections; | ||
}; | ||
|
||
function checkDateTrunc(node: ESQLDateTruncFunction): QueryCorrection[] { | ||
if (node.args.length !== 2) { | ||
return []; | ||
} | ||
|
||
const firstArg = node.args[0]; | ||
|
||
if (isStringLiteralNode(firstArg) && isTimespanString(firstArg.value)) { | ||
const replacement = stringToTimespanLiteral(firstArg.value); | ||
node.args[0] = replacement; | ||
|
||
const correction: QueryCorrection = { | ||
type: 'string_as_timespan_literal', | ||
node, | ||
description: `Replaced string literal with timespan literal in DATE_TRUNC function at position ${node.location.min}`, | ||
}; | ||
return [correction]; | ||
} | ||
|
||
return []; | ||
} | ||
|
||
function checkBucket(node: ESQLBucketFunction): QueryCorrection[] { | ||
// only checking the 2 args version - e.g. BUCKET(hire_date, 1 week) | ||
if (node.args.length !== 2) { | ||
return []; | ||
} | ||
|
||
const secondArg = node.args[1]; | ||
|
||
if (isStringLiteralNode(secondArg) && isTimespanString(secondArg.value)) { | ||
const replacement = stringToTimespanLiteral(secondArg.value); | ||
node.args[1] = replacement; | ||
|
||
const correction: QueryCorrection = { | ||
type: 'string_as_timespan_literal', | ||
node, | ||
description: `Replaced string literal with timespan literal in BUCKET function at position ${node.location.min}`, | ||
}; | ||
return [correction]; | ||
} | ||
|
||
return []; | ||
} |
20 changes: 20 additions & 0 deletions
20
x-pack/plugins/inference/common/tasks/nl_to_esql/ast/corrections/types.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { ESQLSingleAstItem } from '@kbn/esql-ast'; | ||
|
||
/** | ||
* Represents a correction that was applied to the query | ||
*/ | ||
export interface QueryCorrection { | ||
/** The type of correction */ | ||
type: string; | ||
/** A human-friendly-ish description of the correction */ | ||
description: string; | ||
/** The parent node the correction was applied to */ | ||
node: ESQLSingleAstItem; | ||
} |
Oops, something went wrong.