Skip to content

Commit

Permalink
[ES|QL] Finalize string parsing (unquote and unescape strings) (elast…
Browse files Browse the repository at this point in the history
…ic#203610)

## Summary

Closes elastic#203445

- Un-escapes and un-quotes all strings when parsing. So the strings can
be compared and string nodes can be constructed using `Builder` and then
correctly formatted by pretty-printer.
- Introduces `valueUnqoted` field to string literal nodes.
- Refactors `GROK` and `DISSECT` command parsing into their separate
files.


### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
  • Loading branch information
vadimkibana authored Dec 16, 2024
1 parent 7370cc7 commit 64630ab
Show file tree
Hide file tree
Showing 14 changed files with 340 additions and 143 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,13 @@ describe('literal', () => {
const node = Builder.expression.literal.string('abc');
const text = BasicPrettyPrinter.expression(node);

expect(text).toBe('"""abc"""');
expect(text).toBe('"abc"');
expect(node).toMatchObject({
type: 'literal',
literalType: 'keyword',
name: '"""abc"""',
value: '"""abc"""',
name: '"abc"',
value: '"abc"',
valueUnquoted: 'abc',
});
});
});
Expand Down Expand Up @@ -260,7 +261,7 @@ describe('literal', () => {
});
const text = BasicPrettyPrinter.expression(node);

expect(text).toBe('["""a""", """b""", """c"""]');
expect(text).toBe('["a", "b", "c"]');
});

test('integer list', () => {
Expand Down
54 changes: 33 additions & 21 deletions src/platform/packages/shared/kbn-esql-ast/src/builder/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ import {
ESQLParamLiteral,
ESQLFunction,
ESQLAstItem,
ESQLStringLiteral,
ESQLBinaryExpression,
ESQLUnaryExpression,
ESQLTimeInterval,
ESQLStringLiteral,
ESQLBooleanLiteral,
ESQLNullLiteral,
} from '../types';
Expand Down Expand Up @@ -368,26 +368,6 @@ export namespace Builder {
) as ESQLDecimalLiteral;
};

export const string = (
value: string,
template?: Omit<AstNodeTemplate<ESQLStringLiteral>, 'name' | 'literalType'>,
fromParser?: Partial<AstNodeParserFields>
): ESQLStringLiteral => {
// TODO: Once (https://github.com/elastic/kibana/issues/203445) do not use
// triple quotes and escape the string.
const quotedValue = '"""' + value + '"""';
const node: ESQLStringLiteral = {
...template,
...Builder.parserFields(fromParser),
type: 'literal',
literalType: 'keyword',
name: quotedValue,
value: quotedValue,
};

return node;
};

/**
* Constructs "time interval" literal node.
*
Expand All @@ -407,6 +387,38 @@ export namespace Builder {
};
};

export const string = (
valueUnquoted: string,
template?: Omit<
AstNodeTemplate<ESQLStringLiteral>,
'name' | 'literalType' | 'value' | 'valueUnquoted'
> &
Partial<Pick<ESQLStringLiteral, 'name'>>,
fromParser?: Partial<AstNodeParserFields>
): ESQLStringLiteral => {
const value =
'"' +
valueUnquoted
.replace(/\\/g, '\\\\')
.replace(/"/g, '\\"')
.replace(/\n/g, '\\n')
.replace(/\r/g, '\\r')
.replace(/\t/g, '\\t') +
'"';
const name = template?.name ?? value;
const node: ESQLStringLiteral = {
...template,
...Builder.parserFields(fromParser),
type: 'literal',
literalType: 'keyword',
name,
value,
valueUnquoted,
};

return node;
};

export const list = (
template: Omit<AstNodeTemplate<ESQLList>, 'name'>,
fromParser?: Partial<AstNodeParserFields>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,15 +286,19 @@ describe('commands', () => {
},
{
type: 'literal',
value: '"b"',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
{
type: 'option',
name: 'append_separator',
args: [
{
type: 'literal',
value: '"c"',
literalType: 'keyword',
name: '"c"',
valueUnquoted: 'c',
},
],
},
Expand All @@ -303,6 +307,31 @@ describe('commands', () => {
]);
});

it('DISSECT (no options)', () => {
const query = 'FROM index | DISSECT a "b"';
const { ast } = parse(query);

expect(ast).toMatchObject([
{},
{
type: 'command',
name: 'dissect',
args: [
{
type: 'column',
name: 'a',
},
{
type: 'literal',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
],
},
]);
});

it('GROK', () => {
const query = 'FROM index | GROK a "b"';
const { ast } = parse(query);
Expand All @@ -319,7 +348,9 @@ describe('commands', () => {
},
{
type: 'literal',
value: '"b"',
literalType: 'keyword',
name: '"b"',
valueUnquoted: 'b',
},
],
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ describe('literal expression', () => {

it('numeric expression captures "value", and "name" fields', () => {
const text = 'ROW 1';
const { ast } = parse(text);
const literal = ast[0].args[0] as ESQLLiteral;
const { root } = parse(text);
const literal = root.commands[0].args[0] as ESQLLiteral;

expect(literal).toMatchObject({
type: 'literal',
Expand All @@ -39,9 +39,9 @@ describe('literal expression', () => {

it('doubles vs integers', () => {
const text = 'ROW a(1.0, 1)';
const { ast } = parse(text);
const { root } = parse(text);

expect(ast[0]).toMatchObject({
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
Expand All @@ -61,54 +61,117 @@ describe('literal expression', () => {
});
});

// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('single-quoted string', () => {
const text = 'ROW "abc"';
const { root } = parse(text);
describe('string', () => {
describe('single quoted', () => {
it('empty string', () => {
const text = 'ROW "", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'abc',
},
],
});
});
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '""',
valueUnquoted: '',
},
{},
],
});
});

// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('unescapes characters', () => {
const text = 'ROW "a\\nbc"';
const { root } = parse(text);
it('short string', () => {
const text = 'ROW "abc", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'a\nbc',
},
],
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"abc"',
valueUnquoted: 'abc',
},
{},
],
});
});

it('escaped characters', () => {
const text = 'ROW "a\\nb\\tc\\rd\\\\e\\"f", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"a\\nb\\tc\\rd\\\\e\\"f"',
valueUnquoted: 'a\nb\tc\rd\\e"f',
},
{},
],
});
});
});
});

// TODO: Un-skip once string parsing fixed: https://github.com/elastic/kibana/issues/203445
it.skip('triple-quoted string', () => {
const text = 'ROW """abc"""';
const { root } = parse(text);
describe('triple quoted', () => {
it('empty string', () => {
const text = 'ROW """""", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
value: 'abc',
},
],
expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '""""""',
valueUnquoted: '',
},
{},
],
});
});

it('short string', () => {
const text = 'ROW """abc""", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"""abc"""',
valueUnquoted: 'abc',
},
{},
],
});
});

it('characters are not escaped', () => {
const text = 'ROW """a\\nb\\c\\"d""", 1';
const { root } = parse(text);

expect(root.commands[0]).toMatchObject({
type: 'command',
args: [
{
type: 'literal',
literalType: 'keyword',
name: '"""a\\nb\\c\\"d"""',
valueUnquoted: 'a\\nb\\c\\"d',
},
{},
],
});
});
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ import {
visitByOption,
collectAllColumnIdentifiers,
visitRenameClauses,
visitDissect,
visitGrok,
collectBooleanExpression,
visitOrderExpressions,
getPolicyName,
Expand All @@ -60,6 +58,8 @@ import {
} from './walkers';
import type { ESQLAst, ESQLAstMetricsCommand } from '../types';
import { createJoinCommand } from './factories/join';
import { createDissectCommand } from './factories/dissect';
import { createGrokCommand } from './factories/grok';

export class ESQLAstBuilderListener implements ESQLParserListener {
private ast: ESQLAst = [];
Expand Down Expand Up @@ -262,19 +262,19 @@ export class ESQLAstBuilderListener implements ESQLParserListener {
* @param ctx the parse tree
*/
exitDissectCommand(ctx: DissectCommandContext) {
const command = createCommand('dissect', ctx);
const command = createDissectCommand(ctx);

this.ast.push(command);
command.args.push(...visitDissect(ctx));
}

/**
* Exit a parse tree produced by `esql_parser.grokCommand`.
* @param ctx the parse tree
*/
exitGrokCommand(ctx: GrokCommandContext) {
const command = createCommand('grok', ctx);
const command = createGrokCommand(ctx);

this.ast.push(command);
command.args.push(...visitGrok(ctx));
}

/**
Expand Down
Loading

0 comments on commit 64630ab

Please sign in to comment.