Skip to content

Commit

Permalink
[NL-to-ESQL] correctCommonEsqlMistakes: add timespan literals auto-…
Browse files Browse the repository at this point in the history
…correct (#202190)

## Summary

Part of #198942

Fixes bad grammar regarding using string literals instead of timespan
literals for `DATE_TRUNC` and `BUCKET` functions.

This PR also paves the way for additional AST-based grammar corrections


**Example**

*Input*
```esql
FROM logs
| EVAL trunc_year = DATE_TRUNC("1 year", date)
| EVAL trunc_month = DATE_TRUNC("month", date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "3 HOUR")
```
*Output*
```esql
FROM logs
| EVAL trunc_year = DATE_TRUNC(1 year, date)
| EVAL trunc_month = DATE_TRUNC(1 month, date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)
```
  • Loading branch information
pgayvallet authored Dec 2, 2024
1 parent 9b99070 commit 742854f
Show file tree
Hide file tree
Showing 22 changed files with 573 additions and 169 deletions.
8 changes: 1 addition & 7 deletions x-pack/plugins/inference/common/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,7 @@
* 2.0.
*/

export {
correctCommonEsqlMistakes,
splitIntoCommands,
} from './tasks/nl_to_esql/correct_common_esql_mistakes';

export { correctCommonEsqlMistakes, splitIntoCommands } from './tasks/nl_to_esql';
export { generateFakeToolCallId } from './utils/generate_fake_tool_call_id';

export { createOutputApi } from './output';

export type { ChatCompleteRequestBody, GetConnectorsResponseBody } from './http_apis';
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ESQLTimeInterval } from '@kbn/esql-ast';

const units = [
'millisecond',
'milliseconds',
'ms',
//
'second',
'seconds',
'sec',
's',
//
'minute',
'minutes',
'min',
//
'hour',
'hours',
'h',
//
'day',
'days',
'd',
//
'week',
'weeks',
'w',
//
'month',
'months',
'mo',
//
'quarter',
'quarters',
'q',
//
'year',
'years',
'yr',
'y',
];

const timespanStringRegexp = new RegExp(`^["']?([0-9]+)?\\s*?(${units.join('|')})["']?$`, 'i');

export function createTimespanLiteral(unit: string, quantity: number): ESQLTimeInterval {
return {
type: 'timeInterval',
quantity,
unit,
text: `${unit}${quantity}`,
name: `${unit} ${quantity}`,
incomplete: false,
location: { min: 0, max: 0 },
};
}

export function isTimespanString(str: string): boolean {
return Boolean(str.match(timespanStringRegexp));
}

export function stringToTimespanLiteral(str: string): ESQLTimeInterval {
const match = timespanStringRegexp.exec(str);
if (!match) {
throw new Error(`String "${str}" cannot be converted to timespan literal`);
}
const [_, quantity, unit] = match;

return createTimespanLiteral(unit.toLowerCase(), quantity ? parseInt(quantity, 10) : 1);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { BasicPrettyPrinter, parse } from '@kbn/esql-ast';
import { correctAll, type QueryCorrection } from './corrections';

interface CorrectWithAstResult {
output: string;
corrections: QueryCorrection[];
}

export const correctQueryWithAst = (query: string): CorrectWithAstResult => {
const { root, errors } = parse(query);
// don't try modifying anything if the query is not syntactically correct
if (errors) {
return {
output: query,
corrections: [],
};
}

const corrections = correctAll(root);

const multiline = /\r?\n/.test(query);
const formattedQuery = BasicPrettyPrinter.print(root, { multiline, pipeTab: '' });

return {
output: formattedQuery,
corrections,
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ESQLAstQueryExpression } from '@kbn/esql-ast';
import type { QueryCorrection } from './types';
import { applyTimespanLiteralsCorrections } from './timespan_literals';

export type { QueryCorrection } from './types';

export const correctAll = (query: ESQLAstQueryExpression): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];
corrections.push(...applyTimespanLiteralsCorrections(query));
return corrections;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { parse, BasicPrettyPrinter } from '@kbn/esql-ast';
import { applyTimespanLiteralsCorrections } from './timespan_literals';

describe('getTimespanLiteralsCorrections', () => {
describe('with DATE_TRUNC', () => {
it('replaces a timespan with a proper timespan literal', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"`
);
});

it('replaces a timespan without quantity', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("month", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 month, date)"`
);
});

it('replaces uppercase literals', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 YEAR", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | EVAL truncated = DATE_TRUNC(1 year, date)"`
);
});

it('returns info about the correction', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
type: 'string_as_timespan_literal',
description:
'Replaced string literal with timespan literal in DATE_TRUNC function at position 29',
node: expect.any(Object),
});
});
});

describe('with BUCKET', () => {
it('replaces a timespan with a proper timespan literal', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 week")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"`
);
});

it('replaces a timespan without quantity', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 1 hour)"`
);
});

it('replaces uppercase literals', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 WEEK")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root);

expect(output).toMatchInlineSnapshot(
`"FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, 1 week)"`
);
});

it('returns info about the correction', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
type: 'string_as_timespan_literal',
description:
'Replaced string literal with timespan literal in BUCKET function at position 45',
node: expect.any(Object),
});
});
});

describe('with mixed usages', () => {
it('find all occurrences in a complex query', () => {
const query = `FROM logs
| EVAL trunc_year = DATE_TRUNC("1 year", date)
| EVAL trunc_month = DATE_TRUNC("month", date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "3 hour")`;
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);

const output = BasicPrettyPrinter.print(root, { multiline: true, pipeTab: '' });

expect(output).toMatchInlineSnapshot(`
"FROM logs
| EVAL trunc_year = DATE_TRUNC(1 year, date)
| EVAL trunc_month = DATE_TRUNC(1 month, date)
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, 3 hour)"
`);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { Walker, type ESQLAstQueryExpression } from '@kbn/esql-ast';
import { isDateTruncFunctionNode, isBucketFunctionNode, isStringLiteralNode } from '../typeguards';
import type { ESQLDateTruncFunction, ESQLBucketFunction } from '../types';
import { stringToTimespanLiteral, isTimespanString } from '../ast_tools/timespan';
import { QueryCorrection } from './types';

/**
* Correct timespan literal grammar mistakes, and returns the list of corrections that got applied.
*
* E.g.
* `DATE_TRUNC("YEAR", @timestamp)` => `DATE_TRUNC(1 year, @timestamp)`
* `BUCKET(@timestamp, "1 week")` => `BUCKET(@timestamp, 1 week)`
*
*/
export const applyTimespanLiteralsCorrections = (
query: ESQLAstQueryExpression
): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];

Walker.walk(query, {
visitFunction: (node) => {
if (isDateTruncFunctionNode(node)) {
corrections.push(...checkDateTrunc(node));
}
if (isBucketFunctionNode(node)) {
corrections.push(...checkBucket(node));
}
},
});

return corrections;
};

function checkDateTrunc(node: ESQLDateTruncFunction): QueryCorrection[] {
if (node.args.length !== 2) {
return [];
}

const firstArg = node.args[0];

if (isStringLiteralNode(firstArg) && isTimespanString(firstArg.value)) {
const replacement = stringToTimespanLiteral(firstArg.value);
node.args[0] = replacement;

const correction: QueryCorrection = {
type: 'string_as_timespan_literal',
node,
description: `Replaced string literal with timespan literal in DATE_TRUNC function at position ${node.location.min}`,
};
return [correction];
}

return [];
}

function checkBucket(node: ESQLBucketFunction): QueryCorrection[] {
// only checking the 2 args version - e.g. BUCKET(hire_date, 1 week)
if (node.args.length !== 2) {
return [];
}

const secondArg = node.args[1];

if (isStringLiteralNode(secondArg) && isTimespanString(secondArg.value)) {
const replacement = stringToTimespanLiteral(secondArg.value);
node.args[1] = replacement;

const correction: QueryCorrection = {
type: 'string_as_timespan_literal',
node,
description: `Replaced string literal with timespan literal in BUCKET function at position ${node.location.min}`,
};
return [correction];
}

return [];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ESQLSingleAstItem } from '@kbn/esql-ast';

/**
* Represents a correction that was applied to the query
*/
export interface QueryCorrection {
/** The type of correction */
type: string;
/** A human-friendly-ish description of the correction */
description: string;
/** The parent node the correction was applied to */
node: ESQLSingleAstItem;
}
Loading

0 comments on commit 742854f

Please sign in to comment.