Skip to content

Commit

Permalink
ES|QL: better validation for GROK patterns (elastic#110574)
Browse files Browse the repository at this point in the history
  • Loading branch information
luigidellaquila authored Jul 9, 2024
1 parent 7e0222d commit bfc32a2
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 6 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/110574.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 110574
summary: "ES|QL: better validation for GROK patterns"
area: ES|QL
type: bug
issues:
- 110533
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,13 @@ public enum Cap {
/**
* Fix for union-types when aggregating over an inline conversion with casting operator. Done in #110476.
*/
UNION_TYPES_AGG_CAST;
UNION_TYPES_AGG_CAST,

/**
* Fix to GROK validation in case of multiple fields with same name and different types
* https://github.com/elastic/elasticsearch/issues/110533
*/
GROK_VALIDATION;

private final boolean snapshotOnly;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,30 @@ public PlanFactory visitEvalCommand(EsqlBaseParser.EvalCommandContext ctx) {
@Override
public PlanFactory visitGrokCommand(EsqlBaseParser.GrokCommandContext ctx) {
return p -> {
Source source = source(ctx);
String pattern = visitString(ctx.string()).fold().toString();
Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), Grok.pattern(source(ctx), pattern));
Grok.Parser grokParser = Grok.pattern(source, pattern);
validateGrokPattern(source, grokParser, pattern);
Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), grokParser);
return result;
};
}

private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern) {
Map<String, DataType> definedAttributes = new HashMap<>();
for (Attribute field : grokParser.extractedFields()) {
String name = field.name();
DataType type = field.dataType();
DataType prev = definedAttributes.put(name, type);
if (prev != null) {
throw new ParsingException(
source,
"Invalid GROK pattern [" + pattern + "]: the attribute [" + name + "] is defined multiple times with different types"
);
}
}
}

@Override
public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) {
return p -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class Grok extends RegexExtract {

public record Parser(String pattern, org.elasticsearch.grok.Grok grok) {

private List<Attribute> extractedFields() {
public List<Attribute> extractedFields() {
return grok.captureConfig()
.stream()
.sorted(Comparator.comparing(GrokCaptureConfig::name))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -758,15 +758,27 @@ public void testDissectPattern() {
public void testGrokPattern() {
LogicalPlan cmd = processingCommand("grok a \"%{WORD:foo}\"");
assertEquals(Grok.class, cmd.getClass());
Grok dissect = (Grok) cmd;
assertEquals("%{WORD:foo}", dissect.parser().pattern());
assertEquals(List.of(referenceAttribute("foo", KEYWORD)), dissect.extractedFields());
Grok grok = (Grok) cmd;
assertEquals("%{WORD:foo}", grok.parser().pattern());
assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields());

ParsingException pe = expectThrows(ParsingException.class, () -> statement("row a = \"foo bar\" | grok a \"%{_invalid_:x}\""));
assertThat(
pe.getMessage(),
containsString("Invalid pattern [%{_invalid_:x}] for grok: Unable to find pattern [_invalid_] in Grok's pattern dictionary")
);

cmd = processingCommand("grok a \"%{WORD:foo} %{WORD:foo}\"");
assertEquals(Grok.class, cmd.getClass());
grok = (Grok) cmd;
assertEquals("%{WORD:foo} %{WORD:foo}", grok.parser().pattern());
assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields());

expectError(
"row a = \"foo bar\" | GROK a \"%{NUMBER:foo} %{WORD:foo}\"",
"line 1:22: Invalid GROK pattern [%{NUMBER:foo} %{WORD:foo}]:"
+ " the attribute [foo] is defined multiple times with different types"
);
}

public void testLikeRLike() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,38 @@
- match: { values.0.2: [1, 2] }
- match: { values.0.3: [1, 2] }
- match: { values.0.4: [1.1, 2.2] }


---
"grok with duplicate names and different types #110533":
- requires:
test_runner_features: [capabilities]
capabilities:
- method: POST
path: /_query
parameters: []
capabilities: [grok_validation]
reason: "fixed grok validation with patterns containing the same attribute multiple times with different types"
- do:
indices.create:
index: test_grok
body:
mappings:
properties:
first_name :
type : keyword
last_name:
type: keyword

- do:
bulk:
refresh: true
body:
- { "index": { "_index": "test_grok" } }
- { "first_name": "Georgi", "last_name":"Facello" }

- do:
catch: '/Invalid GROK pattern \[%\{NUMBER:foo\} %\{WORD:foo\}\]: the attribute \[foo\] is defined multiple times with different types/'
esql.query:
body:
query: 'FROM test_grok | KEEP name | WHERE last_name == "Facello" | EVAL name = concat("1 ", last_name) | GROK name "%{NUMBER:foo} %{WORD:foo}"'

0 comments on commit bfc32a2

Please sign in to comment.