From 6e69f708b7a7212a0473977767eb3feee258b216 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 18 Oct 2023 02:18:57 +0800 Subject: [PATCH] Fix dissect ingest processor parsing empty brackets failed (#9255) * Fix dissect ingest processor parsing empty brackets failed Signed-off-by: Gao Binlong * Modify change log Signed-off-by: Gao Binlong * Modify change log Signed-off-by: Gao Binlong * Add assertion Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong Signed-off-by: Daniel (dB.) Doubrovkine Co-authored-by: Daniel (dB.) Doubrovkine Signed-off-by: Shivansh Arora --- CHANGELOG.md | 1 + .../org/opensearch/dissect/DissectParser.java | 11 +++++- .../ingest/common/DissectProcessorTests.java | 24 +++++++++++++ .../test/ingest/200_dissect_processor.yml | 35 +++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a02b5f4e7242e..284244de08829 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,6 +111,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Removed ### Fixed +- Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) - Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) - Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) - Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496)) diff --git a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java index b6dc0ceb1028f..828d4b7de450e 100644 --- a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java @@ -231,7 +231,10 @@ public Map parse(String inputString) { int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed // if a match is found jump forward to the end of the match - for (; i < input.length; i++) { + while (i < input.length) { + // start is only used to record the value of i + int start = i; + lookAheadMatches = 0; // potential match between delimiter and input string if (delimiter.length > 0 && input[i] == delimiter[0]) { @@ -283,8 +286,14 @@ public Map parse(String inputString) { delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); // i is always one byte after the last found delimiter, aka the start of the next value valueStart = i; + } else { + i++; } + } else { + i++; } + // i should change anyway + assert (i != start); } // the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) // and there is no trailing delimiter diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java index ca0c0df40f009..e42a1147825d1 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java @@ -155,4 +155,28 @@ public void testNullValueWithOutIgnoreMissing() { IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); } + + public void testMatchEmptyBrackets() { + IngestDocument ingestDocument = new IngestDocument( + "_index", + "_id", + null, + null, + null, + Collections.singletonMap("message", "[foo],[bar],[]") + ); + DissectProcessor dissectProcessor = new DissectProcessor("", null, "message", "[%{a}],[%{b}],[%{c}]", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("foo", ingestDocument.getFieldValue("a", String.class)); + assertEquals("bar", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + + ingestDocument = new IngestDocument("_index", "_id", null, null, null, Collections.singletonMap("message", "{}{}{}{baz}")); + dissectProcessor = new DissectProcessor("", null, "message", "{%{a}}{%{b}}{%{c}}{%{d}}", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("", ingestDocument.getFieldValue("a", String.class)); + assertEquals("", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + assertEquals("baz", ingestDocument.getFieldValue("d", String.class)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml index 916a7fe656cc2..d90e5fbf2362b 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml @@ -84,3 +84,38 @@ teardown: } ] } + +--- +"Test dissect processor can match empty brackets": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "dissect" : { + "field" : "message", + "pattern" : "[%{a}][%{b}][%{c}]" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: {message: "[foo][bar][]"} + + - do: + get: + index: test + id: 1 + - match: { _source.message: "[foo][bar][]" } + - match: { _source.a: "foo" } + - match: { _source.b: "bar" } + - match: { _source.c: "" }