From df37092b20b49b536fe541cc1dfcd294922ee7e7 Mon Sep 17 00:00:00 2001 From: Vadim Kibana <82822460+vadimkibana@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:19:59 +0100 Subject: [PATCH] [ES|QL] `JOIN` command parser support (#202749) ## Summary Partially addresses https://github.com/elastic/kibana/issues/200858 - Adds `JOIN` command support in Kibana ES|QL AST and parser. - Adds `commandType` to AST nodes, to support ` JOIN ...` in join commands. - Adds `AS` binary expression, to support *target* aliassing in `JOIN` commands: `LEFT JOIN a AS b` ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --- .../src/parser/__tests__/join.test.ts | 202 ++++++++++++++++++ .../src/parser/esql_ast_builder_listener.ts | 19 ++ .../kbn-esql-ast/src/parser/factories/join.ts | 57 +++++ packages/kbn-esql-ast/src/parser/walkers.ts | 2 +- packages/kbn-esql-ast/src/query/query.ts | 14 +- packages/kbn-esql-ast/src/types.ts | 7 + .../src/validation/validation.ts | 6 +- 7 files changed, 301 insertions(+), 6 deletions(-) create mode 100644 packages/kbn-esql-ast/src/parser/__tests__/join.test.ts create mode 100644 packages/kbn-esql-ast/src/parser/factories/join.ts diff --git a/packages/kbn-esql-ast/src/parser/__tests__/join.test.ts b/packages/kbn-esql-ast/src/parser/__tests__/join.test.ts new file mode 100644 index 0000000000000..5784e0c71cb86 --- /dev/null +++ b/packages/kbn-esql-ast/src/parser/__tests__/join.test.ts @@ -0,0 +1,202 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import { EsqlQuery } from '../../query'; +import { Walker } from '../../walker'; + +describe(' JOIN command', () => { + describe('correctly formatted', () => { + it('can parse out JOIN command', () => { + const text = `FROM employees | LOOKUP JOIN languages_lookup ON language_code`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + type: 'command', + name: 'join', + commandType: 'lookup', + }); + }); + + it('supports all join types', () => { + const assertJoinType = (type: string) => { + const text = `FROM employees | ${type} JOIN languages_lookup ON language_code`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + type: 'command', + name: 'join', + commandType: type.toLowerCase(), + }); + }; + + assertJoinType('LOOKUP'); + assertJoinType('LEFT'); + assertJoinType('RIGHT'); + expect(() => assertJoinType('HASH')).toThrow(); + }); + + it('can parse out target identifier', () => { + const text = `FROM employees | LOOKUP JOIN languages_lookup ON language_code`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + commandType: 'lookup', + args: [ + { + type: 'identifier', + name: 'languages_lookup', + }, + {}, + ], + }); + }); + + it('can parse out target with "AS" alias expression', () => { + const text = `FROM employees | LOOKUP JOIN languages_lookup AS ll ON language_code`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + commandType: 'lookup', + args: [ + { + type: 'function', + subtype: 'binary-expression', + name: 'as', + args: [ + { + type: 'identifier', + name: 'languages_lookup', + }, + { + type: 'identifier', + name: 'll', + }, + ], + }, + {}, + ], + }); + }); + + it('can parse out a single "ON" predicate expression', () => { + const text = `FROM employees | LOOKUP JOIN languages_lookup AS ll ON language_code`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + commandType: 'lookup', + args: [ + {}, + { + type: 'option', + name: 'on', + args: [ + { + type: 'column', + name: 'language_code', + args: [ + { + type: 'identifier', + name: 'language_code', + }, + ], + }, + ], + }, + ], + }); + }); + + it('can parse out multiple "ON" predicate expressions', () => { + const text = `FROM employees | LOOKUP JOIN languages_lookup AS ll ON a, b, c`; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[1]).toMatchObject({ + name: 'join', + args: [ + {}, + { + type: 'option', + name: 'on', + args: [ + { + type: 'column', + name: 'a', + }, + { + type: 'column', + name: 'b', + }, + { + type: 'column', + name: 'c', + }, + ], + }, + ], + }); + }); + + it('example from documentation', () => { + const text = ` + FROM employees + | EVAL language_code = languages + | LOOKUP JOIN languages_lookup ON language_code + | WHERE emp_no < 500 + | KEEP emp_no, language_name + | SORT emp_no + | LIMIT 10 + `; + const query = EsqlQuery.fromSrc(text); + + expect(query.ast.commands[2]).toMatchObject({ + type: 'command', + name: 'join', + commandType: 'lookup', + args: [ + { + type: 'identifier', + name: 'languages_lookup', + }, + { + type: 'option', + name: 'on', + args: [ + { + type: 'column', + name: 'language_code', + }, + ], + }, + ], + }); + }); + + it('correctly extracts node positions', () => { + const text = `FROM employees | LOOKUP JOIN index AS alias ON on_1, on_2 | LIMIT 1`; + const query = EsqlQuery.fromSrc(text); + const node1 = Walker.match(query.ast, { type: 'identifier', name: 'index' }); + const node2 = Walker.match(query.ast, { type: 'identifier', name: 'alias' }); + const node3 = Walker.match(query.ast, { type: 'column', name: 'on_1' }); + const node4 = Walker.match(query.ast, { type: 'column', name: 'on_2' }); + + expect(query.src.slice(node1?.location.min, node1?.location.max! + 1)).toBe('index'); + expect(query.src.slice(node2?.location.min, node2?.location.max! + 1)).toBe('alias'); + expect(query.src.slice(node3?.location.min, node3?.location.max! + 1)).toBe('on_1'); + expect(query.src.slice(node4?.location.min, node4?.location.max! + 1)).toBe('on_2'); + }); + }); + + describe('incorrectly formatted', () => { + const text = `FROM employees | LOOKUP JOIN index AAS alias ON on_1, on_2 | LIMIT 1`; + const query = EsqlQuery.fromSrc(text); + + expect(query.errors.length > 0).toBe(true); + expect(query.errors[0].message.includes('AAS')).toBe(true); + }); +}); diff --git a/packages/kbn-esql-ast/src/parser/esql_ast_builder_listener.ts b/packages/kbn-esql-ast/src/parser/esql_ast_builder_listener.ts index a3f5bfabed154..e167a55f1b682 100644 --- a/packages/kbn-esql-ast/src/parser/esql_ast_builder_listener.ts +++ b/packages/kbn-esql-ast/src/parser/esql_ast_builder_listener.ts @@ -30,6 +30,7 @@ import { type MetricsCommandContext, IndexPatternContext, InlinestatsCommandContext, + JoinCommandContext, } from '../antlr/esql_parser'; import { default as ESQLParserListener } from '../antlr/esql_parser_listener'; import { @@ -58,6 +59,7 @@ import { getEnrichClauses, } from './walkers'; import type { ESQLAst, ESQLAstMetricsCommand } from '../types'; +import { createJoinCommand } from './factories/join'; export class ESQLAstBuilderListener implements ESQLParserListener { private ast: ESQLAst = []; @@ -304,6 +306,23 @@ export class ESQLAstBuilderListener implements ESQLParserListener { command.args.push(...getPolicyName(ctx), ...getMatchField(ctx), ...getEnrichClauses(ctx)); } + /** + * Exit a parse tree produced by `esql_parser.joinCommand`. + * + * Parse the JOIN command: + * + * ``` + * JOIN identifier [ AS identifier ] ON expression [, expression [, ... ]] + * ``` + * + * @param ctx the parse tree + */ + exitJoinCommand(ctx: JoinCommandContext): void { + const command = createJoinCommand(ctx); + + this.ast.push(command); + } + enterEveryRule(ctx: ParserRuleContext): void { // method not implemented, added to satisfy interface expectation } diff --git a/packages/kbn-esql-ast/src/parser/factories/join.ts b/packages/kbn-esql-ast/src/parser/factories/join.ts new file mode 100644 index 0000000000000..400313aa39045 --- /dev/null +++ b/packages/kbn-esql-ast/src/parser/factories/join.ts @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import { JoinCommandContext, JoinTargetContext } from '../../antlr/esql_parser'; +import { Builder } from '../../builder'; +import { ESQLAstItem, ESQLBinaryExpression, ESQLCommand, ESQLIdentifier } from '../../types'; +import { createCommand, createIdentifier } from '../factories'; +import { visitValueExpression } from '../walkers'; + +const createNodeFromJoinTarget = ( + ctx: JoinTargetContext +): ESQLIdentifier | ESQLBinaryExpression => { + const index = createIdentifier(ctx._index); + const aliasCtx = ctx._alias; + + if (!aliasCtx) { + return index; + } + + const alias = createIdentifier(aliasCtx); + const renameExpression = Builder.expression.func.binary('as', [ + index, + alias, + ]) as ESQLBinaryExpression; + + return renameExpression; +}; + +export const createJoinCommand = (ctx: JoinCommandContext): ESQLCommand => { + const command = createCommand('join', ctx); + + // Pick-up the of the command. + command.commandType = (ctx._type_.text ?? '').toLocaleLowerCase(); + + const joinTarget = createNodeFromJoinTarget(ctx.joinTarget()); + const onOption = Builder.option({ name: 'on' }); + const joinPredicates: ESQLAstItem[] = onOption.args; + + for (const joinPredicateCtx of ctx.joinCondition().joinPredicate_list()) { + const expression = visitValueExpression(joinPredicateCtx.valueExpression()); + + if (expression) { + joinPredicates.push(expression); + } + } + + command.args.push(joinTarget); + command.args.push(onOption); + + return command; +}; diff --git a/packages/kbn-esql-ast/src/parser/walkers.ts b/packages/kbn-esql-ast/src/parser/walkers.ts index 60d69a17bb1c7..60dfafa6e3c89 100644 --- a/packages/kbn-esql-ast/src/parser/walkers.ts +++ b/packages/kbn-esql-ast/src/parser/walkers.ts @@ -267,7 +267,7 @@ function getComparisonName(ctx: ComparisonOperatorContext) { return (ctx.EQ() || ctx.NEQ() || ctx.LT() || ctx.LTE() || ctx.GT() || ctx.GTE()).getText() || ''; } -function visitValueExpression(ctx: ValueExpressionContext) { +export function visitValueExpression(ctx: ValueExpressionContext) { if (!textExistsAndIsValid(ctx.getText())) { return []; } diff --git a/packages/kbn-esql-ast/src/query/query.ts b/packages/kbn-esql-ast/src/query/query.ts index 66c9fd58df085..638cc4fc17f32 100644 --- a/packages/kbn-esql-ast/src/query/query.ts +++ b/packages/kbn-esql-ast/src/query/query.ts @@ -9,7 +9,7 @@ import type { Token } from 'antlr4'; import { ParseOptions, parse } from '../parser'; -import type { ESQLAstQueryExpression } from '../types'; +import type { ESQLAstQueryExpression, EditorError } from '../types'; import { WrappingPrettyPrinter, WrappingPrettyPrinterOptions, @@ -21,8 +21,9 @@ import { */ export class EsqlQuery { public static readonly fromSrc = (src: string, opts?: ParseOptions): EsqlQuery => { - const { root, tokens } = parse(src, opts); - return new EsqlQuery(root, src, tokens); + const { root, tokens, errors } = parse(src, opts); + + return new EsqlQuery(root, src, tokens, errors); }; constructor( @@ -43,7 +44,12 @@ export class EsqlQuery { * Optional array of ANTLR tokens, in case the query was parsed from a * source code. */ - public readonly tokens: Token[] = [] + public readonly tokens: Token[] = [], + + /** + * Parsing errors. + */ + public readonly errors: EditorError[] = [] ) {} public print(opts?: WrappingPrettyPrinterOptions): string { diff --git a/packages/kbn-esql-ast/src/types.ts b/packages/kbn-esql-ast/src/types.ts index 2a8513fc2ced1..60da69b04989a 100644 --- a/packages/kbn-esql-ast/src/types.ts +++ b/packages/kbn-esql-ast/src/types.ts @@ -76,6 +76,13 @@ export interface ESQLAstNodeFormatting { export interface ESQLCommand extends ESQLAstBaseItem { type: 'command'; + + /** + * The subtype of the command. For example, the `JOIN` command can be: (1) + * LOOKUP JOIN, (2) LEFT JOIN, (3) RIGHT JOIN. + */ + commandType?: string; + args: ESQLAstItem[]; } diff --git a/packages/kbn-esql-validation-autocomplete/src/validation/validation.ts b/packages/kbn-esql-validation-autocomplete/src/validation/validation.ts index b3076d107f850..ae8ab41da157e 100644 --- a/packages/kbn-esql-validation-autocomplete/src/validation/validation.ts +++ b/packages/kbn-esql-validation-autocomplete/src/validation/validation.ts @@ -1117,7 +1117,11 @@ function validateCommand( // do not check the command exists, the grammar is already picking that up const commandDef = getCommandDefinition(command.name); - if (commandDef?.validate) { + if (!commandDef) { + return messages; + } + + if (commandDef.validate) { messages.push(...commandDef.validate(command)); }