From c8ae7f05a7823c480c5e9b2dccae5ca26ad0908b Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Fri, 12 Jan 2024 16:50:30 -0600 Subject: [PATCH 01/11] Fix log appender in InsecureSettingTests (#11866) The appender was being added to the logger before the appender was started. This can lead to logger errors with concurrently running tests because the logger is static state shared across the JVM. See #10799. I've also added a removeAppender call that was missing from LoggersTests, but that is mostly hygiene and would not lead to failures. Signed-off-by: Andrew Ross --- .../common/logging/LoggersTests.java | 63 ++++++++++--------- .../common/settings/InsecureSettingTests.java | 7 ++- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/server/src/test/java/org/opensearch/common/logging/LoggersTests.java b/server/src/test/java/org/opensearch/common/logging/LoggersTests.java index 17c4f9d0fe13d..d9db57aef15b6 100644 --- a/server/src/test/java/org/opensearch/common/logging/LoggersTests.java +++ b/server/src/test/java/org/opensearch/common/logging/LoggersTests.java @@ -53,40 +53,45 @@ public void testParameterizedMessageLambda() throws Exception { appender.start(); final Logger testLogger = LogManager.getLogger(LoggersTests.class); Loggers.addAppender(testLogger, appender); - Loggers.setLevel(testLogger, Level.TRACE); + try { + Loggers.setLevel(testLogger, Level.TRACE); - Throwable ex = randomException(); - testLogger.error(() -> new ParameterizedMessage("an error message"), ex); - assertThat(appender.lastEvent.getLevel(), equalTo(Level.ERROR)); - assertThat(appender.lastEvent.getThrown(), equalTo(ex)); - assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("an error message")); + Throwable ex = randomException(); + testLogger.error(() -> new ParameterizedMessage("an error message"), ex); + assertThat(appender.lastEvent.getLevel(), equalTo(Level.ERROR)); + assertThat(appender.lastEvent.getThrown(), equalTo(ex)); + assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("an error message")); - ex = randomException(); - testLogger.warn(() -> new ParameterizedMessage("a warn message: [{}]", "long gc"), ex); - assertThat(appender.lastEvent.getLevel(), equalTo(Level.WARN)); - assertThat(appender.lastEvent.getThrown(), equalTo(ex)); - assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a warn message: [long gc]")); - assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining("long gc")); + ex = randomException(); + testLogger.warn(() -> new ParameterizedMessage("a warn message: [{}]", "long gc"), ex); + assertThat(appender.lastEvent.getLevel(), equalTo(Level.WARN)); + assertThat(appender.lastEvent.getThrown(), equalTo(ex)); + assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a warn message: [long gc]")); + assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining("long gc")); - testLogger.info(() -> new ParameterizedMessage("an info message a=[{}], b=[{}], c=[{}]", 1, 2, 3)); - assertThat(appender.lastEvent.getLevel(), equalTo(Level.INFO)); - assertThat(appender.lastEvent.getThrown(), nullValue()); - assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("an info message a=[1], b=[2], c=[3]")); - assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(1, 2, 3)); + testLogger.info(() -> new ParameterizedMessage("an info message a=[{}], b=[{}], c=[{}]", 1, 2, 3)); + assertThat(appender.lastEvent.getLevel(), equalTo(Level.INFO)); + assertThat(appender.lastEvent.getThrown(), nullValue()); + assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("an info message a=[1], b=[2], c=[3]")); + assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(1, 2, 3)); - ex = randomException(); - testLogger.debug(() -> new ParameterizedMessage("a debug message options = {}", Arrays.asList("yes", "no")), ex); - assertThat(appender.lastEvent.getLevel(), equalTo(Level.DEBUG)); - assertThat(appender.lastEvent.getThrown(), equalTo(ex)); - assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a debug message options = [yes, no]")); - assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(Arrays.asList("yes", "no"))); + ex = randomException(); + testLogger.debug(() -> new ParameterizedMessage("a debug message options = {}", Arrays.asList("yes", "no")), ex); + assertThat(appender.lastEvent.getLevel(), equalTo(Level.DEBUG)); + assertThat(appender.lastEvent.getThrown(), equalTo(ex)); + assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a debug message options = [yes, no]")); + assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(Arrays.asList("yes", "no"))); - ex = randomException(); - testLogger.trace(() -> new ParameterizedMessage("a trace message; element = [{}]", new Object[] { null }), ex); - assertThat(appender.lastEvent.getLevel(), equalTo(Level.TRACE)); - assertThat(appender.lastEvent.getThrown(), equalTo(ex)); - assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a trace message; element = [null]")); - assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(new Object[] { null })); + ex = randomException(); + testLogger.trace(() -> new ParameterizedMessage("a trace message; element = [{}]", new Object[] { null }), ex); + assertThat(appender.lastEvent.getLevel(), equalTo(Level.TRACE)); + assertThat(appender.lastEvent.getThrown(), equalTo(ex)); + assertThat(appender.lastParameterizedMessage().getFormattedMessage(), equalTo("a trace message; element = [null]")); + assertThat(appender.lastParameterizedMessage().getParameters(), arrayContaining(new Object[] { null })); + } finally { + Loggers.removeAppender(testLogger, appender); + appender.stop(); + } } private Throwable randomException() { diff --git a/server/src/test/java/org/opensearch/common/settings/InsecureSettingTests.java b/server/src/test/java/org/opensearch/common/settings/InsecureSettingTests.java index b256ab956f963..9358013826a1c 100644 --- a/server/src/test/java/org/opensearch/common/settings/InsecureSettingTests.java +++ b/server/src/test/java/org/opensearch/common/settings/InsecureSettingTests.java @@ -25,7 +25,7 @@ public class InsecureSettingTests extends OpenSearchTestCase { private List rootLogMsgs = new ArrayList<>(); private AbstractAppender rootAppender; - protected void assertSettingWarning() { + private void assertSettingWarning() { assertWarnings( "[setting.name] setting was deprecated in OpenSearch and will be removed in a future release! See the breaking changes documentation for the next major version." ); @@ -50,13 +50,14 @@ public void append(LogEvent event) { InsecureSettingTests.this.rootLogMsgs.add(message); } }; - Loggers.addAppender(LogManager.getRootLogger(), rootAppender); rootAppender.start(); + Loggers.addAppender(LogManager.getLogger(SecureSetting.class), rootAppender); } @After public void removeInsecureSettingsAppender() { - Loggers.removeAppender(LogManager.getRootLogger(), rootAppender); + Loggers.removeAppender(LogManager.getLogger(SecureSetting.class), rootAppender); + rootAppender.stop(); } public void testShouldRaiseExceptionByDefault() { From 988dea88a0aaae192db602f35f5cd6d714d07fce Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Sat, 13 Jan 2024 22:16:44 +0800 Subject: [PATCH 02/11] Update supported version for must_exist parameter in update aliases API (#11872) * Update supported version for must_exist parameter in update aliases API Signed-off-by: Gao Binlong * Modify change log Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../40_remove_with_must_exist.yml | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10338f6646053..a4d42116fa9af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Don't over-allocate in HeapBufferedAsyncEntityConsumer in order to consume the response ([#9993](https://github.com/opensearch-project/OpenSearch/pull/9993)) - Update supported version for max_shard_size parameter in Shrink API ([#11439](https://github.com/opensearch-project/OpenSearch/pull/11439)) - Fix typo in API annotation check message ([11836](https://github.com/opensearch-project/OpenSearch/pull/11836)) +- Update supported version for must_exist parameter in update aliases API ([#11872](https://github.com/opensearch-project/OpenSearch/pull/11872)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.update_aliases/40_remove_with_must_exist.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.update_aliases/40_remove_with_must_exist.yml index dbf6a4fad3295..b9457f0290897 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.update_aliases/40_remove_with_must_exist.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.update_aliases/40_remove_with_must_exist.yml @@ -1,8 +1,8 @@ --- "Throw aliases missing exception when removing non-existing alias with setting must_exist to true": - skip: - version: " - 2.99.99" - reason: "introduced in 3.0" + version: " - 2.11.99" + reason: "introduced in 2.12.0" - do: indices.create: @@ -47,8 +47,8 @@ --- "Throw aliases missing exception when all of the specified aliases are non-existing": - skip: - version: " - 2.99.99" - reason: "introduced in 3.0" + version: " - 2.11.99" + reason: "introduced in 2.12.0" - do: indices.create: @@ -81,8 +81,8 @@ --- "Remove successfully when some specified aliases are non-existing": - skip: - version: " - 2.99.99" - reason: "introduced in 3.0" + version: " - 2.11.99" + reason: "introduced in 2.12.0" - do: indices.create: @@ -116,8 +116,8 @@ --- "Remove silently when all of the specified aliases are non-existing and must_exist is false": - skip: - version: " - 2.99.99" - reason: "introduced in 3.0" + version: " - 2.11.99" + reason: "introduced in 2.12.0" - do: indices.create: From ac4aca210f799237922b15c4c55faeccd8cb9f7c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 09:16:42 -0500 Subject: [PATCH 03/11] Bump lycheeverse/lychee-action from 1.9.0 to 1.9.1 (#11887) * Bump lycheeverse/lychee-action from 1.9.0 to 1.9.1 Bumps [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action) from 1.9.0 to 1.9.1. - [Release notes](https://github.com/lycheeverse/lychee-action/releases) - [Commits](https://github.com/lycheeverse/lychee-action/compare/v1.9.0...v1.9.1) --- updated-dependencies: - dependency-name: lycheeverse/lychee-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- .github/workflows/links.yml | 2 +- CHANGELOG.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/links.yml b/.github/workflows/links.yml index 2714d45bd108f..61962c91b4903 100644 --- a/.github/workflows/links.yml +++ b/.github/workflows/links.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/checkout@v4 - name: lychee Link Checker id: lychee - uses: lycheeverse/lychee-action@v1.9.0 + uses: lycheeverse/lychee-action@v1.9.1 with: args: --accept=200,403,429 --exclude-mail **/*.html **/*.md **/*.txt **/*.json --exclude-file .lychee.excludes fail: true diff --git a/CHANGELOG.md b/CHANGELOG.md index a4d42116fa9af..8b6840b62ca5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -165,7 +165,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.commons:commons-lang3` from 3.13.0 to 3.14.0 ([#11691](https://github.com/opensearch-project/OpenSearch/pull/11691)) - Bump `com.maxmind.db:maxmind-db` from 3.0.0 to 3.1.0 ([#11693](https://github.com/opensearch-project/OpenSearch/pull/11693)) - Bump `net.java.dev.jna:jna` from 5.13.0 to 5.14.0 ([#11798](https://github.com/opensearch-project/OpenSearch/pull/11798)) -- Bump `lycheeverse/lychee-action` from 1.8.0 to 1.9.0 ([#11795](https://github.com/opensearch-project/OpenSearch/pull/11795)) +- Bump `lycheeverse/lychee-action` from 1.8.0 to 1.9.1 ([#11795](https://github.com/opensearch-project/OpenSearch/pull/11795), [#11887](https://github.com/opensearch-project/OpenSearch/pull/11887)) - Bump `Lucene` from 9.8.0 to 9.9.1 ([#11421](https://github.com/opensearch-project/OpenSearch/pull/11421)) ### Changed From a90dd86646d64f76f9e08b11455c522b202d10ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 09:23:18 -0500 Subject: [PATCH 04/11] Bump com.networknt:json-schema-validator from 1.0.86 to 1.1.0 (#11886) * Bump com.networknt:json-schema-validator from 1.0.86 to 1.1.0 Bumps [com.networknt:json-schema-validator](https://github.com/networknt/json-schema-validator) from 1.0.86 to 1.1.0. - [Release notes](https://github.com/networknt/json-schema-validator/releases) - [Changelog](https://github.com/networknt/json-schema-validator/blob/master/CHANGELOG.md) - [Commits](https://github.com/networknt/json-schema-validator/compare/1.0.86...1.1.0) --- updated-dependencies: - dependency-name: com.networknt:json-schema-validator dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + buildSrc/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6840b62ca5c..6d2558ef5743f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -167,6 +167,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `net.java.dev.jna:jna` from 5.13.0 to 5.14.0 ([#11798](https://github.com/opensearch-project/OpenSearch/pull/11798)) - Bump `lycheeverse/lychee-action` from 1.8.0 to 1.9.1 ([#11795](https://github.com/opensearch-project/OpenSearch/pull/11795), [#11887](https://github.com/opensearch-project/OpenSearch/pull/11887)) - Bump `Lucene` from 9.8.0 to 9.9.1 ([#11421](https://github.com/opensearch-project/OpenSearch/pull/11421)) +- Bump `com.networknt:json-schema-validator` from 1.0.86 to 1.1.0 ([#11886](https://github.com/opensearch-project/OpenSearch/pull/11886)) ### Changed - Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 3c846b48549fb..1cb21acd14af7 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -118,7 +118,7 @@ dependencies { api 'com.avast.gradle:gradle-docker-compose-plugin:0.17.6' api "org.yaml:snakeyaml:${props.getProperty('snakeyaml')}" api 'org.apache.maven:maven-model:3.9.6' - api 'com.networknt:json-schema-validator:1.0.86' + api 'com.networknt:json-schema-validator:1.1.0' api 'org.jruby.jcodings:jcodings:1.0.58' api 'org.jruby.joni:joni:2.2.1' api "com.fasterxml.jackson.core:jackson-databind:${props.getProperty('jackson_databind')}" From c132db950548ee99127bc92be8d0f388cc0f4a51 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 15 Jan 2024 13:01:29 -0500 Subject: [PATCH 05/11] Bump OpenTelemetry from 1.32.0 to 1.34.1 (#11891) Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + buildSrc/version.properties | 2 +- .../telemetry-otel/licenses/opentelemetry-api-1.32.0.jar.sha1 | 1 - .../telemetry-otel/licenses/opentelemetry-api-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-context-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-context-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-exporter-common-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-exporter-common-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-exporter-logging-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-exporter-logging-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-exporter-otlp-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-exporter-otlp-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-exporter-otlp-common-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-exporter-otlp-common-1.34.1.jar.sha1 | 1 + .../opentelemetry-exporter-sender-okhttp-1.32.0.jar.sha1 | 1 - .../opentelemetry-exporter-sender-okhttp-1.34.1.jar.sha1 | 1 + .../opentelemetry-extension-incubator-1.32.0-alpha.jar.sha1 | 1 - .../opentelemetry-extension-incubator-1.34.1-alpha.jar.sha1 | 1 + .../telemetry-otel/licenses/opentelemetry-sdk-1.32.0.jar.sha1 | 1 - .../telemetry-otel/licenses/opentelemetry-sdk-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-sdk-common-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-sdk-common-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-sdk-logs-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-sdk-logs-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-sdk-metrics-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-sdk-metrics-1.34.1.jar.sha1 | 1 + .../licenses/opentelemetry-sdk-trace-1.32.0.jar.sha1 | 1 - .../licenses/opentelemetry-sdk-trace-1.34.1.jar.sha1 | 1 + 28 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-api-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-api-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-context-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-context-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.32.0-alpha.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.34.1-alpha.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.34.1.jar.sha1 delete mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.32.0.jar.sha1 create mode 100644 plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.34.1.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d2558ef5743f..26d324839ae54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -126,6 +126,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) - Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) - Introduce new feature flag "WRITEABLE_REMOTE_INDEX" to gate the writeable remote index functionality ([#11717](https://github.com/opensearch-project/OpenSearch/pull/11170)) +- Bump OpenTelemetry from 1.32.0 to 1.34.1 ([#11891](https://github.com/opensearch-project/OpenSearch/pull/11891)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 3813750507f18..dd7f2e1eaabf0 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -70,5 +70,5 @@ jzlib = 1.1.3 resteasy = 6.2.4.Final # opentelemetry dependencies -opentelemetry = 1.32.0 +opentelemetry = 1.34.1 opentelemetrysemconv = 1.23.1-alpha diff --git a/plugins/telemetry-otel/licenses/opentelemetry-api-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-api-1.32.0.jar.sha1 deleted file mode 100644 index 2c038aad4b934..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-api-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a5c081d8f877225732efe13908f350029c811709 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-api-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-api-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..19f734ca17b79 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-api-1.34.1.jar.sha1 @@ -0,0 +1 @@ +b4aea155f6d6b1032eba85378564431cfd86f562 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-context-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-context-1.32.0.jar.sha1 deleted file mode 100644 index 3243f524432eb..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-context-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c5f8bb68084ea5709a27e935907b1bb49d0bd049 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-context-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-context-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..4c06d28cba199 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-context-1.34.1.jar.sha1 @@ -0,0 +1 @@ +3fcc87f3d810ce49d865ee54b40831559c5e129b \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.32.0.jar.sha1 deleted file mode 100644 index 1d7da47286ae0..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3643061da474061ffa7f2036a58a7a0d40212276 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..91a5c0f715d2b --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.34.1.jar.sha1 @@ -0,0 +1 @@ +19c9a3f52851a1333b648ed83c82d16eb4c64afd \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.32.0.jar.sha1 deleted file mode 100644 index 3fab0e47adcbe..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ab56c7223112fac13a66e3f667c5fc666f4a3707 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..6c05600ae3b08 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.34.1.jar.sha1 @@ -0,0 +1 @@ +b3e74d5b8cf5e60d9965042fa284085bbe081ce3 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.32.0.jar.sha1 deleted file mode 100644 index f93cf7a63bfad..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5752d171cd08ac84f9273258a315bc5f97e1187e \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..f54e6f6893050 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.34.1.jar.sha1 @@ -0,0 +1 @@ +af68f90f0410b7b3a1900d3e0a15ad51b10ffd5b \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.32.0.jar.sha1 deleted file mode 100644 index 2fc33b62aee54..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6b41cd66a385d513b58b6617f20b701435b64abd \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..49d40b36ba85b --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.34.1.jar.sha1 @@ -0,0 +1 @@ +4acab18052267e280d1f9de22c591a5c88bed3a6 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.32.0.jar.sha1 deleted file mode 100644 index 99f758b047aa2..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9346006cead763247a786b5cabf3e1ae3c88eadb \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..a01de2aa84c43 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.34.1.jar.sha1 @@ -0,0 +1 @@ +9f07e1764389e076a36fb7d9e5769e29f3dab950 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.32.0-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.32.0-alpha.jar.sha1 deleted file mode 100644 index 705a342a684c4..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.32.0-alpha.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fab56e187e3fb3c70c18223184d53a76500114ab \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.34.1-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.34.1-alpha.jar.sha1 new file mode 100644 index 0000000000000..a5fc8c2059104 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.34.1-alpha.jar.sha1 @@ -0,0 +1 @@ +9201e6a43a0a89515626f7516c7d1b2c349f76df \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.32.0.jar.sha1 deleted file mode 100644 index 31818695cc774..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -504de8cc7dc68e84c8c7c2757522d934e9c50d35 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..cd746f0756e46 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.34.1.jar.sha1 @@ -0,0 +1 @@ +ab49eb621d6d01f0ad2f016989d0352ef18ea9a2 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.32.0.jar.sha1 deleted file mode 100644 index 3cf3080a98bd9..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -454c7a6afab864de9f0c166246f28f16aaa824c1 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..740737dc13efc --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.34.1.jar.sha1 @@ -0,0 +1 @@ +01fcd8bad38d7b8987f6fc93bd7e933240eb727e \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.32.0.jar.sha1 deleted file mode 100644 index 41b0dca07556e..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b054760243906af0a327a8f5bd99adc2826ccd88 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..e6ff3dbafda22 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.34.1.jar.sha1 @@ -0,0 +1 @@ +abad9abc80dfe6118a60413afa161696bbf8dd43 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.32.0.jar.sha1 deleted file mode 100644 index 2f71fd5cc780a..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bff24f085193e105d4e23e3db27bf81ccb3d830e \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..36ec960c4f7be --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.34.1.jar.sha1 @@ -0,0 +1 @@ +d88407ae475e5f4e859a81e4f61e362e939f7bc2 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.32.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.32.0.jar.sha1 deleted file mode 100644 index f0060b8a0f78f..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.32.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d80ad3210fa890a856a1d04379d134ab44a09501 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.34.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.34.1.jar.sha1 new file mode 100644 index 0000000000000..293b82f206c99 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.34.1.jar.sha1 @@ -0,0 +1 @@ +121a75c2ba9ed8b80f5ff131c2411a5d460f38d0 \ No newline at end of file From 4c283a7c79b20e73ccf3378124893451197b85a3 Mon Sep 17 00:00:00 2001 From: Fahad Shami <48306098+fahadshamiinsta@users.noreply.github.com> Date: Wed, 17 Jan 2024 09:32:55 +1100 Subject: [PATCH 06/11] Support for Google Application Default Credentials (#8394) * fixed conflicts Signed-off-by: fahadshamiinsta * applying spotless Java check Signed-off-by: fahadshamiinsta * added a comment to test helper method Signed-off-by: fahadshamiinsta * added a comment to GoogleApplicationDefaultCredentials class with document reference Signed-off-by: fahadshamiinsta * to rerun gradle checks Signed-off-by: fahadshamiinsta * increasing coverage by adding another test Signed-off-by: fahadshamiinsta * test name change Signed-off-by: fahadshamiinsta * rerun ci Signed-off-by: fahadshamiinsta * rerun ci Signed-off-by: fahadshamiinsta * force push to rerun ci Signed-off-by: fahadshamiinsta * pushing to trigger ci checks Signed-off-by: fahadshamiinsta --------- Signed-off-by: fahadshamiinsta --- CHANGELOG.md | 1 + .../GoogleApplicationDefaultCredentials.java | 33 +++++ .../gcs/GoogleCloudStorageService.java | 20 ++- .../gcs/GoogleCloudStorageServiceTests.java | 132 ++++++++++++++---- 4 files changed, 157 insertions(+), 29 deletions(-) create mode 100644 plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleApplicationDefaultCredentials.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 26d324839ae54..14055f0d8462f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add task completion count in search backpressure stats API ([#10028](https://github.com/opensearch-project/OpenSearch/pull/10028/)) - Deprecate CamelCase `PathHierarchy` tokenizer name in favor to lowercase `path_hierarchy` ([#10894](https://github.com/opensearch-project/OpenSearch/pull/10894)) - Switched to more reliable OpenSearch Lucene snapshot location([#11728](https://github.com/opensearch-project/OpenSearch/pull/11728)) +- Added support for Google Application Default Credentials in repository-gcs ([#8394](https://github.com/opensearch-project/OpenSearch/pull/8394)) ### Deprecated diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleApplicationDefaultCredentials.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleApplicationDefaultCredentials.java new file mode 100644 index 0000000000000..5002ab9a2e704 --- /dev/null +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleApplicationDefaultCredentials.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.gcs; + +import com.google.auth.oauth2.GoogleCredentials; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; + +/** + * This class facilitates to fetch Application Default Credentials + * see How Application Default Credentials works + */ +public class GoogleApplicationDefaultCredentials { + private static final Logger logger = LogManager.getLogger(GoogleApplicationDefaultCredentials.class); + + public GoogleCredentials get() { + GoogleCredentials credentials = null; + try { + credentials = SocketAccess.doPrivilegedIOException(GoogleCredentials::getApplicationDefault); + } catch (IOException e) { + logger.error("Failed to retrieve \"Application Default Credentials\"", e); + } + return credentials; + } +} diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java index c9ebb3acaf3e5..83a4146c99b99 100644 --- a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java @@ -36,6 +36,7 @@ import com.google.api.client.http.HttpRequestInitializer; import com.google.api.client.http.HttpTransport; import com.google.api.client.http.javanet.NetHttpTransport; +import com.google.auth.oauth2.GoogleCredentials; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.ServiceOptions; import com.google.cloud.http.HttpTransportOptions; @@ -70,6 +71,16 @@ public class GoogleCloudStorageService { */ private volatile Map clientCache = emptyMap(); + final private GoogleApplicationDefaultCredentials googleApplicationDefaultCredentials; + + public GoogleCloudStorageService() { + this.googleApplicationDefaultCredentials = new GoogleApplicationDefaultCredentials(); + } + + public GoogleCloudStorageService(GoogleApplicationDefaultCredentials googleApplicationDefaultCredentials) { + this.googleApplicationDefaultCredentials = googleApplicationDefaultCredentials; + } + /** * Refreshes the client settings and clears the client cache. Subsequent calls to * {@code GoogleCloudStorageService#client} will return new clients constructed @@ -213,10 +224,11 @@ StorageOptions createStorageOptions( storageOptionsBuilder.setProjectId(clientSettings.getProjectId()); } if (clientSettings.getCredential() == null) { - logger.warn( - "\"Application Default Credentials\" are not supported out of the box." - + " Additional file system permissions have to be granted to the plugin." - ); + logger.info("\"Application Default Credentials\" will be in use"); + final GoogleCredentials credentials = googleApplicationDefaultCredentials.get(); + if (credentials != null) { + storageOptionsBuilder.setCredentials(credentials); + } } else { ServiceAccountCredentials serviceAccountCredentials = clientSettings.getCredential(); // override token server URI diff --git a/plugins/repository-gcs/src/test/java/org/opensearch/repositories/gcs/GoogleCloudStorageServiceTests.java b/plugins/repository-gcs/src/test/java/org/opensearch/repositories/gcs/GoogleCloudStorageServiceTests.java index a531555debefb..58e412684ed5a 100644 --- a/plugins/repository-gcs/src/test/java/org/opensearch/repositories/gcs/GoogleCloudStorageServiceTests.java +++ b/plugins/repository-gcs/src/test/java/org/opensearch/repositories/gcs/GoogleCloudStorageServiceTests.java @@ -33,8 +33,10 @@ package org.opensearch.repositories.gcs; import com.google.auth.Credentials; +import com.google.auth.oauth2.GoogleCredentials; import com.google.cloud.http.HttpTransportOptions; import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; import org.opensearch.common.settings.MockSecureSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -42,30 +44,38 @@ import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.test.OpenSearchTestCase; +import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; +import java.io.IOException; +import java.net.Proxy; +import java.net.URI; +import java.net.URISyntaxException; import java.security.KeyPair; import java.security.KeyPairGenerator; import java.util.Base64; import java.util.Locale; import java.util.UUID; +import org.mockito.Mockito; + import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; public class GoogleCloudStorageServiceTests extends OpenSearchTestCase { + final TimeValue connectTimeValue = TimeValue.timeValueNanos(randomIntBetween(0, 2000000)); + final TimeValue readTimeValue = TimeValue.timeValueNanos(randomIntBetween(0, 2000000)); + final String applicationName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); + final String endpoint = randomFrom("http://", "https://") + + randomFrom("www.opensearch.org", "www.googleapis.com", "localhost/api", "google.com/oauth") + + ":" + + randomIntBetween(1, 65535); + final String projectIdName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); + public void testClientInitializer() throws Exception { final String clientName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); - final TimeValue connectTimeValue = TimeValue.timeValueNanos(randomIntBetween(0, 2000000)); - final TimeValue readTimeValue = TimeValue.timeValueNanos(randomIntBetween(0, 2000000)); - final String applicationName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); - final String endpoint = randomFrom("http://", "https://") - + randomFrom("www.opensearch.org", "www.googleapis.com", "localhost/api", "google.com/oauth") - + ":" - + randomIntBetween(1, 65535); - final String projectIdName = randomAlphaOfLength(randomIntBetween(1, 10)).toLowerCase(Locale.ROOT); final Settings settings = Settings.builder() .put( GoogleCloudStorageClientSettings.CONNECT_TIMEOUT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), @@ -82,31 +92,35 @@ public void testClientInitializer() throws Exception { .put(GoogleCloudStorageClientSettings.ENDPOINT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), endpoint) .put(GoogleCloudStorageClientSettings.PROJECT_ID_SETTING.getConcreteSettingForNamespace(clientName).getKey(), projectIdName) .build(); - final GoogleCloudStorageService service = new GoogleCloudStorageService(); + GoogleCredentials mockGoogleCredentials = Mockito.mock(GoogleCredentials.class); + GoogleApplicationDefaultCredentials mockDefaultCredentials = Mockito.mock(GoogleApplicationDefaultCredentials.class); + Mockito.when(mockDefaultCredentials.get()).thenReturn(mockGoogleCredentials); + + final GoogleCloudStorageService service = new GoogleCloudStorageService(mockDefaultCredentials); service.refreshAndClearCache(GoogleCloudStorageClientSettings.load(settings)); GoogleCloudStorageOperationsStats statsCollector = new GoogleCloudStorageOperationsStats("bucket"); final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> service.client("another_client", "repo", statsCollector) ); - assertThat(e.getMessage(), Matchers.startsWith("Unknown client name")); + MatcherAssert.assertThat(e.getMessage(), Matchers.startsWith("Unknown client name")); assertSettingDeprecationsAndWarnings( new Setting[] { GoogleCloudStorageClientSettings.APPLICATION_NAME_SETTING.getConcreteSettingForNamespace(clientName) } ); final Storage storage = service.client(clientName, "repo", statsCollector); - assertThat(storage.getOptions().getApplicationName(), Matchers.containsString(applicationName)); - assertThat(storage.getOptions().getHost(), Matchers.is(endpoint)); - assertThat(storage.getOptions().getProjectId(), Matchers.is(projectIdName)); - assertThat(storage.getOptions().getTransportOptions(), Matchers.instanceOf(HttpTransportOptions.class)); - assertThat( + MatcherAssert.assertThat(storage.getOptions().getApplicationName(), Matchers.containsString(applicationName)); + MatcherAssert.assertThat(storage.getOptions().getHost(), Matchers.is(endpoint)); + MatcherAssert.assertThat(storage.getOptions().getProjectId(), Matchers.is(projectIdName)); + MatcherAssert.assertThat(storage.getOptions().getTransportOptions(), Matchers.instanceOf(HttpTransportOptions.class)); + MatcherAssert.assertThat( ((HttpTransportOptions) storage.getOptions().getTransportOptions()).getConnectTimeout(), Matchers.is((int) connectTimeValue.millis()) ); - assertThat( + MatcherAssert.assertThat( ((HttpTransportOptions) storage.getOptions().getTransportOptions()).getReadTimeout(), Matchers.is((int) readTimeValue.millis()) ); - assertThat(storage.getOptions().getCredentials(), Matchers.nullValue(Credentials.class)); + MatcherAssert.assertThat(storage.getOptions().getCredentials(), Matchers.instanceOf(Credentials.class)); } public void testReinitClientSettings() throws Exception { @@ -122,33 +136,33 @@ public void testReinitClientSettings() throws Exception { final GoogleCloudStorageService storageService = plugin.storageService; GoogleCloudStorageOperationsStats statsCollector = new GoogleCloudStorageOperationsStats("bucket"); final Storage client11 = storageService.client("gcs1", "repo1", statsCollector); - assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); + MatcherAssert.assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); final Storage client12 = storageService.client("gcs2", "repo2", statsCollector); - assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); + MatcherAssert.assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); // client 3 is missing final IllegalArgumentException e1 = expectThrows( IllegalArgumentException.class, () -> storageService.client("gcs3", "repo3", statsCollector) ); - assertThat(e1.getMessage(), containsString("Unknown client name [gcs3].")); + MatcherAssert.assertThat(e1.getMessage(), containsString("Unknown client name [gcs3].")); // update client settings plugin.reload(settings2); // old client 1 not changed - assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); + MatcherAssert.assertThat(client11.getOptions().getProjectId(), equalTo("project_gcs11")); // new client 1 is changed final Storage client21 = storageService.client("gcs1", "repo1", statsCollector); - assertThat(client21.getOptions().getProjectId(), equalTo("project_gcs21")); + MatcherAssert.assertThat(client21.getOptions().getProjectId(), equalTo("project_gcs21")); // old client 2 not changed - assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); + MatcherAssert.assertThat(client12.getOptions().getProjectId(), equalTo("project_gcs12")); // new client2 is gone final IllegalArgumentException e2 = expectThrows( IllegalArgumentException.class, () -> storageService.client("gcs2", "repo2", statsCollector) ); - assertThat(e2.getMessage(), containsString("Unknown client name [gcs2].")); + MatcherAssert.assertThat(e2.getMessage(), containsString("Unknown client name [gcs2].")); // client 3 emerged final Storage client23 = storageService.client("gcs3", "repo3", statsCollector); - assertThat(client23.getOptions().getProjectId(), equalTo("project_gcs23")); + MatcherAssert.assertThat(client23.getOptions().getProjectId(), equalTo("project_gcs23")); } } @@ -193,4 +207,72 @@ public void testToTimeout() { assertEquals(-1, GoogleCloudStorageService.toTimeout(TimeValue.ZERO).intValue()); assertEquals(0, GoogleCloudStorageService.toTimeout(TimeValue.MINUS_ONE).intValue()); } + + /** + * The following method test the Google Application Default Credential instead of + * using service account file. + * Considered use of JUnit Mocking due to static method GoogleCredentials.getApplicationDefault + * and avoiding environment variables to set which later use GCE. + * @throws Exception + */ + public void testApplicationDefaultCredential() throws Exception { + GoogleCloudStorageClientSettings settings = getGCSClientSettingsWithoutCredentials(); + GoogleCredentials mockGoogleCredentials = Mockito.mock(GoogleCredentials.class); + HttpTransportOptions mockHttpTransportOptions = Mockito.mock(HttpTransportOptions.class); + GoogleApplicationDefaultCredentials mockDefaultCredentials = Mockito.mock(GoogleApplicationDefaultCredentials.class); + Mockito.when(mockDefaultCredentials.get()).thenReturn(mockGoogleCredentials); + + GoogleCloudStorageService service = new GoogleCloudStorageService(mockDefaultCredentials); + StorageOptions storageOptions = service.createStorageOptions(settings, mockHttpTransportOptions); + assertNotNull(storageOptions); + assertEquals(storageOptions.getCredentials().toString(), mockGoogleCredentials.toString()); + } + + /** + * The application default credential throws exception when there are + * no Environment Variables provided or Google Compute Engine is not running + * @throws Exception + */ + public void testApplicationDefaultCredentialsWhenNoSettingProvided() throws Exception { + GoogleCloudStorageClientSettings settings = getGCSClientSettingsWithoutCredentials(); + HttpTransportOptions mockHttpTransportOptions = Mockito.mock(HttpTransportOptions.class); + GoogleCloudStorageService service = new GoogleCloudStorageService(); + StorageOptions storageOptions = service.createStorageOptions(settings, mockHttpTransportOptions); + + Exception exception = assertThrows(IOException.class, GoogleCredentials::getApplicationDefault); + assertNotNull(storageOptions); + assertNull(storageOptions.getCredentials()); + MatcherAssert.assertThat(exception.getMessage(), containsString("The Application Default Credentials are not available")); + } + + /** + * The application default credential throws IOException when it is + * used without GoogleCloudStorageService + */ + public void testDefaultCredentialsThrowsExceptionWithoutGCStorageService() { + GoogleApplicationDefaultCredentials googleApplicationDefaultCredentials = new GoogleApplicationDefaultCredentials(); + GoogleCredentials credentials = googleApplicationDefaultCredentials.get(); + assertNull(credentials); + Exception exception = assertThrows(IOException.class, GoogleCredentials::getApplicationDefault); + MatcherAssert.assertThat(exception.getMessage(), containsString("The Application Default Credentials are not available")); + } + + /** + * This is a helper method to provide GCS Client settings without credentials + * @return GoogleCloudStorageClientSettings + * @throws URISyntaxException + */ + private GoogleCloudStorageClientSettings getGCSClientSettingsWithoutCredentials() throws URISyntaxException { + return new GoogleCloudStorageClientSettings( + null, + endpoint, + projectIdName, + connectTimeValue, + readTimeValue, + applicationName, + new URI(""), + new ProxySettings(Proxy.Type.DIRECT, null, 0, null, null) + ); + } + } From 1d8bbd5d643e483a0089243d5c7d0f7e28ac8d16 Mon Sep 17 00:00:00 2001 From: Peter Nied Date: Tue, 16 Jan 2024 16:55:49 -0600 Subject: [PATCH 07/11] Add TRIAGING.md for triage agenda and processes (#11814) Signed-off-by: Peter Nied Signed-off-by: Peter Nied --- TRIAGING.md | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 TRIAGING.md diff --git a/TRIAGING.md b/TRIAGING.md new file mode 100644 index 0000000000000..47cb44a4f5ba2 --- /dev/null +++ b/TRIAGING.md @@ -0,0 +1,83 @@ + + +The maintainers of the OpenSearch Repo seek to promote an inclusive and engaged community of contributors. In order to facilitate this, weekly triage meetings are open-to-all and attendance is encouraged for anyone who hopes to contribute, discuss an issue, or learn more about the project. To learn more about contributing to the OpenSearch Repo visit the [Contributing](./CONTRIBUTING.md) documentation. + +### Do I need to attend for my issue to be addressed/triaged? + +Attendance is not required for your issue to be triaged or addressed. If not accepted the issue will be updated with a comment for next steps. All new issues are triaged weekly. + +You can track if your issue was triaged by watching your GitHub notifications for updates. + +### What happens if my issue does not get covered this time? + +Each meeting we seek to address all new issues. However, should we run out of time before your issue is discussed, you are always welcome to attend the next meeting or to follow up on the issue post itself. + +### How do I join the Triage meeting? + +Meetings are hosted regularly at 10:00a - 10:55a Central Time every Wednesday and can be joined via [Chime](https://aws.amazon.com/chime/), with this [meeting link](https://chime.aws/1988437365). + +After joining the Chime meeting, you can enable your video / voice to join the discussion. If you do not have a webcam or microphone available, you can still join in via the text chat. + +If you have an issue you'd like to bring forth please prepare a link to the issue so it can be presented and viewed by everyone in the meeting. + +### Is there an agenda for each week? + +Meetings are 55 minutes and follows this structure: + +Yes, each 55-minute meeting follows this structure: +1. **Initial Gathering:** Feel free to turn on your video and engage in informal conversation. Shortly, a volunteer triage [facilitator](#what-is-the-role-of-the-facilitator) will begin the meeting and share their screen. +2. **Record Attendees:** The facilitator will request attendees to share their GitHub profile links. These links will be collected and assembled into a [tag](#how-do-triage-facilitator-tag-comments-during-the-triage-meeting) to annotate comments during the meeting. +3. **Announcements:** Any announcements will be made at the beginning of the meeting. +4. **Review of New Issues:** We start by reviewing all untriaged [issues](https://github.com/search?q=label%3Auntriaged+is%3Aopen++repo%3Aopensearch-project%2FOpenSearch+&type=issues&ref=advsearch&s=created&o=desc) for the OpenSearch repo. +5. **Attendee Requests:** An opportunity for any meeting member to request consideration of an issue or pull request. +6. **Open Discussion:** Attendees can bring up any topics not already covered by filed issues or pull requests. + +### What is the role of the facilitator? + +The facilitator is crucial in driving the meeting, ensuring a smooth flow of issues into OpenSearch for future contributions. They maintain the meeting's agenda, solicit input from attendees, and record outcomes using the triage tag as items are discussed. + +### Do I need to have already contributed to the project to attend a triage meeting? + +No prior contributions are required. All interested individuals are welcome and encouraged to attend. Triage meetings offer a fantastic opportunity for new contributors to understand the project and explore various contribution avenues. + +### What if I have an issue that is almost a duplicate, should I open a new one to be triaged? + +You can always open an [issue](https://github.com/opensearch-project/OpenSearch/issues/new/choose) including one that you think may be a duplicate. If you believe your issue is similar but distinct from an existing one, you are encouraged to file it and explain the differences during the triage meeting. + +### What if I have follow-up questions on an issue? + +If you have an existing issue you would like to discuss, you can always comment on the issue itself. Alternatively, you are welcome to come to the triage meeting to discuss. + +### Is this meeting a good place to get help setting up features on my OpenSearch instance? + +While we are always happy to help the community, the best resource for implementation questions is [the OpenSearch forum](https://forum.opensearch.org/). + +There you can find answers to many common questions as well as speak with implementation experts. + +### What are the issue labels associated with triaging? + +Yes, there are several labels that are used to identify the 'state' of issues filed in OpenSearch . +| Label | When Applied | Meaning | +|---------------|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------| +| `Untriaged` | When issues are created or re-opened. | Issues labeled as 'Untriaged' require the attention of the repository maintainers and may need to be prioritized for quicker resolution. It's crucial to keep the count of 'Untriaged' labels low to ensure all potential security issues are addressed in a timely manner. See [SECURITY.md](https://github.com/opensearch-project/OpenSearch/blob/main/SECURITY.md) for more details on handling these issues. | +| `Help Wanted` | Anytime. | Issues marked as 'Help Wanted' signal that they are actionable and not the current focus of the project maintainers. Community contributions are especially encouraged for these issues. | +| `Good First Issue` | Anytime. | Issues labeled as 'Good First Issue' are small in scope and can be resolved with a single pull request. These are recommended starting points for newcomers looking to make their first contributions. | + +### What are the typical outcomes of a triaged issue? + +| Outcome | Label | Description | Canned Response | +|--------------|------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Accepted | `-untriaged` | The issue has the details needed to be directed towards area owners. | "Thanks for filing this issue, please feel free to submit a pull request." | +| Rejected | N/A | The issue will be closed with a reason for why it was rejected. Reasons might include lack of details, or being outside the scope of the project. | "Thanks for creating this issue; however, it isn't being accepted due to {REASON}. Please feel free to re-open after addressing the reason." | +| Area Triage | `+{AREALABEL}` | OpenSearch has many different areas. If it's unclear whether an issue should be accepted, it will be labeled with the area and an owner will be @mentioned for follow-up. | "Thanks for creating this issue; the triage meeting was unsure if this issue should be accepted, @{PERSON} or someone from the area please review and then accept or reject this issue?" | +| Transfer | N/A | If the issue applies to another repository within the OpenSearch Project, it will be transferred accordingly. | "@opensearch-project/triage, can you please transfer this issue to project {REPOSITORY}." Or, if someone at the meeting has permissions, they can start the transfer. | + +### Is this where I should bring up potential security vulnerabilities? + +Due to the sensitive nature of security vulnerabilities, please report all potential vulnerabilities directly by following the steps outlined on the [SECURITY.md](https://github.com/opensearch-project/OpenSearch/blob/main/SECURITY.md) document. + +### How do triage facilitator tag comments during the triage meeting? + +During the triage meeting, facilitators should use the tag _[Triage - attendees [1](#Profile_link) [2](#Profile_link)]_ to indicate a collective decision. This ensures contributors know the decision came from the meeting rather than an individual and identifies participants for any follow-up queries. + +This tag should not be used outside triage meetings. From 6d2d4ddc8b096e95b94b65142ed98437d96d8d29 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 17 Jan 2024 07:23:05 +0800 Subject: [PATCH 08/11] Add copy ingest processor (#11870) --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/CopyProcessor.java | 161 ++++++++ .../common/IngestCommonModulePlugin.java | 1 + .../common/CopyProcessorFactoryTests.java | 101 +++++ .../ingest/common/CopyProcessorTests.java | 125 ++++++ .../rest-api-spec/test/ingest/10_basic.yml | 17 + .../test/ingest/300_copy_processor.yml | 374 ++++++++++++++++++ .../org/opensearch/ingest/IngestDocument.java | 2 +- 8 files changed, 781 insertions(+), 1 deletion(-) create mode 100644 modules/ingest-common/src/main/java/org/opensearch/ingest/common/CopyProcessor.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorFactoryTests.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorTests.java create mode 100644 modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_copy_processor.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 14055f0d8462f..d1e970d097bfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -126,6 +126,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) - Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) +- Add copy ingest processor ([#11870](https://github.com/opensearch-project/OpenSearch/pull/11870)) - Introduce new feature flag "WRITEABLE_REMOTE_INDEX" to gate the writeable remote index functionality ([#11717](https://github.com/opensearch-project/OpenSearch/pull/11170)) - Bump OpenTelemetry from 1.32.0 to 1.34.1 ([#11891](https://github.com/opensearch-project/OpenSearch/pull/11891)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/CopyProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/CopyProcessor.java new file mode 100644 index 0000000000000..dec69df275130 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/CopyProcessor.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.core.common.Strings; +import org.opensearch.ingest.AbstractProcessor; +import org.opensearch.ingest.ConfigurationUtils; +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; +import org.opensearch.script.ScriptService; +import org.opensearch.script.TemplateScript; + +import java.util.Map; + +public final class CopyProcessor extends AbstractProcessor { + public static final String TYPE = "copy"; + + private final TemplateScript.Factory sourceField; + private final TemplateScript.Factory targetField; + + private final boolean ignoreMissing; + + private final boolean removeSource; + + private final boolean overrideTarget; + + CopyProcessor(String tag, String description, TemplateScript.Factory sourceField, TemplateScript.Factory targetField) { + this(tag, description, sourceField, targetField, false, false, false); + } + + CopyProcessor( + String tag, + String description, + TemplateScript.Factory sourceField, + TemplateScript.Factory targetField, + boolean ignoreMissing, + boolean removeSource, + boolean overrideTarget + ) { + super(tag, description); + this.sourceField = sourceField; + this.targetField = targetField; + this.ignoreMissing = ignoreMissing; + this.removeSource = removeSource; + this.overrideTarget = overrideTarget; + } + + public TemplateScript.Factory getSourceField() { + return sourceField; + } + + public TemplateScript.Factory getTargetField() { + return targetField; + } + + public boolean isIgnoreMissing() { + return ignoreMissing; + } + + public boolean isRemoveSource() { + return removeSource; + } + + public boolean isOverrideTarget() { + return overrideTarget; + } + + @Override + public IngestDocument execute(IngestDocument document) { + String source = document.renderTemplate(sourceField); + final boolean sourceFieldPathIsNullOrEmpty = Strings.isNullOrEmpty(source); + if (sourceFieldPathIsNullOrEmpty || document.hasField(source, true) == false) { + if (ignoreMissing) { + return document; + } else if (sourceFieldPathIsNullOrEmpty) { + throw new IllegalArgumentException("source field path cannot be null nor empty"); + } else { + throw new IllegalArgumentException("source field [" + source + "] doesn't exist"); + } + } + + String target = document.renderTemplate(targetField); + if (Strings.isNullOrEmpty(target)) { + throw new IllegalArgumentException("target field path cannot be null nor empty"); + } + if (source.equals(target)) { + throw new IllegalArgumentException("source field path and target field path cannot be same"); + } + + if (overrideTarget || document.hasField(target, true) == false || document.getFieldValue(target, Object.class) == null) { + Object sourceValue = document.getFieldValue(source, Object.class); + document.setFieldValue(target, IngestDocument.deepCopy(sourceValue)); + } else { + throw new IllegalArgumentException("target field [" + target + "] already exists"); + } + + if (removeSource) { + document.removeField(source); + } + + return document; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements Processor.Factory { + + private final ScriptService scriptService; + + public Factory(ScriptService scriptService) { + this.scriptService = scriptService; + } + + @Override + public CopyProcessor create( + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { + String sourceField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "source_field"); + TemplateScript.Factory sourceFieldTemplate = ConfigurationUtils.compileTemplate( + TYPE, + processorTag, + "source_field", + sourceField, + scriptService + ); + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field"); + TemplateScript.Factory targetFieldTemplate = ConfigurationUtils.compileTemplate( + TYPE, + processorTag, + "target_field", + targetField, + scriptService + ); + boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + boolean removeSource = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "remove_source", false); + boolean overrideTarget = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "override_target", false); + + return new CopyProcessor( + processorTag, + description, + sourceFieldTemplate, + targetFieldTemplate, + ignoreMissing, + removeSource, + overrideTarget + ); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java index a2a51d968e078..7c1b4841122b0 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java @@ -106,6 +106,7 @@ public Map getProcessors(Processor.Parameters paramet processors.put(DropProcessor.TYPE, new DropProcessor.Factory()); processors.put(HtmlStripProcessor.TYPE, new HtmlStripProcessor.Factory()); processors.put(CsvProcessor.TYPE, new CsvProcessor.Factory()); + processors.put(CopyProcessor.TYPE, new CopyProcessor.Factory(parameters.scriptService)); return Collections.unmodifiableMap(processors); } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorFactoryTests.java new file mode 100644 index 0000000000000..c1ca86a49e334 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorFactoryTests.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.OpenSearchException; +import org.opensearch.OpenSearchParseException; +import org.opensearch.ingest.TestTemplateService; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class CopyProcessorFactoryTests extends OpenSearchTestCase { + + private CopyProcessor.Factory factory; + + @Before + public void init() { + factory = new CopyProcessor.Factory(TestTemplateService.instance()); + } + + public void testCreate() throws Exception { + boolean ignoreMissing = randomBoolean(); + boolean removeSource = randomBoolean(); + boolean overrideTarget = randomBoolean(); + Map config = new HashMap<>(); + config.put("source_field", "source"); + config.put("target_field", "target"); + config.put("ignore_missing", ignoreMissing); + config.put("remove_source", removeSource); + config.put("override_target", overrideTarget); + String processorTag = randomAlphaOfLength(10); + CopyProcessor copyProcessor = factory.create(null, processorTag, null, config); + assertThat(copyProcessor.getTag(), equalTo(processorTag)); + assertThat(copyProcessor.getSourceField().newInstance(Collections.emptyMap()).execute(), equalTo("source")); + assertThat(copyProcessor.getTargetField().newInstance(Collections.emptyMap()).execute(), equalTo("target")); + assertThat(copyProcessor.isIgnoreMissing(), equalTo(ignoreMissing)); + assertThat(copyProcessor.isRemoveSource(), equalTo(removeSource)); + assertThat(copyProcessor.isOverrideTarget(), equalTo(overrideTarget)); + } + + public void testCreateWithSourceField() throws Exception { + Map config = new HashMap<>(); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[source_field] required property is missing")); + } + + config.put("source_field", null); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[source_field] required property is missing")); + } + } + + public void testCreateWithTargetField() throws Exception { + Map config = new HashMap<>(); + config.put("source_field", "source"); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[target_field] required property is missing")); + } + + config.put("source_field", "source"); + config.put("target_field", null); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[target_field] required property is missing")); + } + } + + public void testInvalidMustacheTemplate() throws Exception { + CopyProcessor.Factory factory = new CopyProcessor.Factory(TestTemplateService.instance(true)); + Map config = new HashMap<>(); + config.put("source_field", "{{source}}"); + config.put("target_field", "target"); + String processorTag = randomAlphaOfLength(10); + OpenSearchException exception = expectThrows(OpenSearchException.class, () -> factory.create(null, processorTag, null, config)); + assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); + assertThat(exception.getMetadata("opensearch.processor_tag").get(0), equalTo(processorTag)); + } + +} diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorTests.java new file mode 100644 index 0000000000000..f271bdd342d0b --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/CopyProcessorTests.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; +import org.opensearch.ingest.RandomDocumentPicks; +import org.opensearch.ingest.TestTemplateService; +import org.opensearch.test.OpenSearchTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class CopyProcessorTests extends OpenSearchTestCase { + + public void testCopyExistingField() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String sourceFieldName = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + String targetFieldName = RandomDocumentPicks.randomFieldName(random()); + Processor processor = createCopyProcessor(sourceFieldName, targetFieldName, false, false, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetFieldName), equalTo(true)); + Object sourceValue = ingestDocument.getFieldValue(sourceFieldName, Object.class); + assertThat(ingestDocument.getFieldValue(targetFieldName, Object.class), equalTo(sourceValue)); + assertThat(ingestDocument.getFieldValue(sourceFieldName, Object.class), equalTo(sourceValue)); + + Processor processorWithEmptyTarget = createCopyProcessor(sourceFieldName, "", false, false, false); + assertThrows( + "target field path cannot be null nor empty", + IllegalArgumentException.class, + () -> processorWithEmptyTarget.execute(ingestDocument) + ); + + Processor processorWithSameSourceAndTarget = createCopyProcessor(sourceFieldName, sourceFieldName, false, false, false); + assertThrows( + "source field path and target field path cannot be same", + IllegalArgumentException.class, + () -> processorWithSameSourceAndTarget.execute(ingestDocument) + ); + } + + public void testCopyWithIgnoreMissing() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String targetFieldName = RandomDocumentPicks.randomFieldName(random()); + Processor processor = createCopyProcessor("non-existing-field", targetFieldName, false, false, false); + assertThrows( + "source field [non-existing-field] doesn't exist", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + + Processor processorWithEmptyFieldName = createCopyProcessor("", targetFieldName, false, false, false); + assertThrows( + "source field path cannot be null nor empty", + IllegalArgumentException.class, + () -> processorWithEmptyFieldName.execute(ingestDocument) + ); + + Processor processorWithIgnoreMissing = createCopyProcessor("non-existing-field", targetFieldName, true, false, false); + processorWithIgnoreMissing.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetFieldName), equalTo(false)); + } + + public void testCopyWithRemoveSource() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String sourceFieldName = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + String targetFieldName = RandomDocumentPicks.randomFieldName(random()); + Object sourceValue = ingestDocument.getFieldValue(sourceFieldName, Object.class); + + Processor processor = createCopyProcessor(sourceFieldName, targetFieldName, false, true, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetFieldName), equalTo(true)); + assertThat(ingestDocument.getFieldValue(targetFieldName, Object.class), equalTo(sourceValue)); + assertThat(ingestDocument.hasField(sourceFieldName), equalTo(false)); + } + + public void testCopyToExistingField() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String targetFieldName = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + Object sourceValue = RandomDocumentPicks.randomFieldValue(random()); + String sourceFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, sourceValue); + + Processor processor = createCopyProcessor(sourceFieldName, targetFieldName, false, false, false); + assertThrows( + "target field [" + targetFieldName + "] already exists", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + + // if override_target is false but target field's value is null, copy can execute successfully + String targetFieldWithNullValue = RandomDocumentPicks.addRandomField(random(), ingestDocument, null); + Processor processorWithTargetNullValue = createCopyProcessor(sourceFieldName, targetFieldWithNullValue, false, false, false); + processorWithTargetNullValue.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetFieldWithNullValue), equalTo(true)); + assertThat(ingestDocument.getFieldValue(targetFieldWithNullValue, Object.class), equalTo(sourceValue)); + + Processor processorWithOverrideTargetIsTrue = createCopyProcessor(sourceFieldName, targetFieldName, false, false, true); + processorWithOverrideTargetIsTrue.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetFieldName), equalTo(true)); + assertThat(ingestDocument.getFieldValue(targetFieldName, Object.class), equalTo(sourceValue)); + } + + private static Processor createCopyProcessor( + String sourceFieldName, + String targetFieldName, + boolean ignoreMissing, + boolean removeSource, + boolean overrideTarget + ) { + return new CopyProcessor( + randomAlphaOfLength(10), + null, + new TestTemplateService.MockTemplateScript.Factory(sourceFieldName), + new TestTemplateService.MockTemplateScript.Factory(targetFieldName), + ignoreMissing, + removeSource, + overrideTarget + ); + } +} diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml index f44cc1f9f9fcf..0719082c887f2 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml @@ -36,3 +36,20 @@ - contains: { nodes.$cluster_manager.ingest.processors: { type: split } } - contains: { nodes.$cluster_manager.ingest.processors: { type: trim } } - contains: { nodes.$cluster_manager.ingest.processors: { type: uppercase } } + +--- +"Copy processor exists": + - skip: + version: " - 2.11.99" + features: contains + reason: "copy processor was introduced in 2.12.0 and contains is a newly added assertion" + - do: + cluster.state: {} + + # Get cluster-manager node id + - set: { cluster_manager_node: cluster_manager } + + - do: + nodes.info: {} + + - contains: { nodes.$cluster_manager.ingest.processors: { type: copy } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_copy_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_copy_processor.yml new file mode 100644 index 0000000000000..0203b62ba67d6 --- /dev/null +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/300_copy_processor.yml @@ -0,0 +1,374 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "1" + ignore: 404 + +--- +"Test creat copy processor": + - skip: + version: " - 2.11.99" + reason: "introduced in 2.12" + - do: + catch: /\[target\_field\] required property is missing/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "source" + } + } + ] + } + - do: + catch: /\[source\_field\] required property is missing/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "target_field" : "target" + } + } + ] + } + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "source", + "target_field" : "target", + "ignore_missing" : true, + "remove_source" : true, + "override_target" : true + } + } + ] + } + - match: { acknowledged: true } + +--- +"Test copy processor with ignore_missing": + - skip: + version: " - 2.11.99" + reason: "introduced in 2.12" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "unknown_field", + "target_field" : "bar" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /source field \[unknown\_field\] doesn\'t exist/ + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "unknown_field", + "target_field" : "bar", + "ignore_missing" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello" + } + - do: + get: + index: test + id: 1 + - match: { _source: { foo: "hello" } } + +--- +"Test copy processor with remove_source": + - skip: + version: " - 2.11.99" + reason: "introduced in 2.12" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "foo", + "target_field" : "bar" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello" + } + - do: + get: + index: test + id: 1 + - match: { _source: { foo: "hello", bar: "hello" } } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "foo", + "target_field" : "bar", + "remove_source" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello" + } + - do: + get: + index: test + id: 1 + - match: { _source: { bar: "hello" } } + +--- +"Test copy processor with override_target": + - skip: + version: " - 2.11.99" + reason: "introduced in 2.12" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "foo", + "target_field" : "bar" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /target field \[bar\] already exists/ + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello", + bar: "world" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "foo", + "target_field" : "bar", + "override_target" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "hello", + bar: "world" + } + - do: + get: + index: test + id: 1 + - match: { _source: { foo: "hello", bar: "hello" } } + +--- +"Test copy processor with template snippets": + - skip: + version: " - 2.11.99" + reason: "introduced in 2.12" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "{{source}}", + "target_field" : "{{target}}" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /source field path cannot be null nor empty/ + index: + index: test + id: 1 + pipeline: "1" + body: { + target: "bar", + foo: "hello", + bar: "world" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "{{source}}", + "target_field" : "{{target}}" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /target field path cannot be null nor empty/ + index: + index: test + id: 1 + pipeline: "1" + body: { + source: "foo", + foo: "hello", + bar: "world" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "{{source}}", + "target_field" : "{{target}}" + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /source field path and target field path cannot be same/ + index: + index: test + id: 1 + pipeline: "1" + body: { + source: "foo", + target: "foo", + foo: "hello", + bar: "world" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "copy" : { + "source_field" : "{{source}}", + "target_field" : "{{target}}", + "override_target" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + source: "foo", + target: "bar", + foo: "hello", + bar: "world" + } + - do: + get: + index: test + id: 1 + - match: { _source: { source: "foo", target: "bar", foo: "hello", bar: "hello" } } diff --git a/server/src/main/java/org/opensearch/ingest/IngestDocument.java b/server/src/main/java/org/opensearch/ingest/IngestDocument.java index 10e9e64db561e..d975b0014de1f 100644 --- a/server/src/main/java/org/opensearch/ingest/IngestDocument.java +++ b/server/src/main/java/org/opensearch/ingest/IngestDocument.java @@ -757,7 +757,7 @@ public static Map deepCopyMap(Map source) { return (Map) deepCopy(source); } - private static Object deepCopy(Object value) { + public static Object deepCopy(Object value) { if (value instanceof Map) { Map mapValue = (Map) value; Map copy = new HashMap<>(mapValue.size()); From 517f091dce6a268060772f4b5740b40ce9d6ee22 Mon Sep 17 00:00:00 2001 From: Ashish Date: Wed, 17 Jan 2024 10:14:43 +0530 Subject: [PATCH 09/11] Fix flaky RemoteStoreRefreshListenerTests test #11256 (#11803) Signed-off-by: Ashish Singh --- .../index/shard/RemoteStoreRefreshListenerTests.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index 811d6a722d0f6..555284e55a0fa 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -418,7 +418,7 @@ public void testTrackerData() throws Exception { RemoteStoreRefreshListener listener = tuple.v1(); RemoteStoreStatsTrackerFactory trackerFactory = tuple.v2(); RemoteSegmentTransferTracker tracker = trackerFactory.getRemoteSegmentTransferTracker(indexShard.shardId()); - assertNoLag(tracker); + assertBusy(() -> assertNoLag(tracker)); indexDocs(100, randomIntBetween(100, 200)); indexShard.refresh("test"); listener.afterRefresh(true); @@ -473,6 +473,14 @@ private Tuple mockIn new InternalEngineFactory() ); + RemoteSegmentTransferTracker tracker = indexShard.getRemoteStoreStatsTrackerFactory() + .getRemoteSegmentTransferTracker(indexShard.shardId()); + try { + assertBusy(() -> assertTrue(tracker.getTotalUploadsSucceeded() > 0)); + } catch (Exception e) { + assert false; + } + indexDocs(1, randomIntBetween(1, 100)); // Mock indexShard.store().directory() @@ -554,7 +562,6 @@ private Tuple mockIn RecoverySettings recoverySettings = mock(RecoverySettings.class); when(recoverySettings.getMinRemoteSegmentMetadataFiles()).thenReturn(10); when(shard.getRecoverySettings()).thenReturn(recoverySettings); - RemoteSegmentTransferTracker tracker = remoteStoreStatsTrackerFactory.getRemoteSegmentTransferTracker(indexShard.shardId()); RemoteStoreRefreshListener refreshListener = new RemoteStoreRefreshListener(shard, emptyCheckpointPublisher, tracker); refreshListener.afterRefresh(true); return Tuple.tuple(refreshListener, remoteStoreStatsTrackerFactory); From 5dd4b61e83d9bbbdfd7fd490de4e8336bcc8e4f8 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 17 Jan 2024 21:14:10 +0800 Subject: [PATCH 10/11] Remove ingest processor supports excluding fields (#10967) * Remove ingest processor supports field patterns and excluding fields Signed-off-by: Gao Binlong * Format some code Signed-off-by: Gao Binlong * Fix test failure Signed-off-by: Gao Binlong * Remove the code of field pattern Signed-off-by: Gao Binlong * Add skip version in rest test yml Signed-off-by: Gao Binlong * Make fields and exclude_fields mutually exclusive when constructing RemoveProcessor Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/RemoveProcessor.java | 165 +++++++++++++----- .../common/RemoveProcessorFactoryTests.java | 38 ++-- .../ingest/common/RemoveProcessorTests.java | 46 +++++ .../test/ingest/290_remove_processor.yml | 40 +++++ .../opensearch/ingest/ConfigurationUtils.java | 8 + 6 files changed, 244 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1e970d097bfb..d083406d63518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,6 +103,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) - Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670)) +- Remove ingest processor supports excluding fields ([#10967](https://github.com/opensearch-project/OpenSearch/pull/10967)) - [Tiered caching] Enabling serialization for IndicesRequestCache key object ([#10275](https://github.com/opensearch-project/OpenSearch/pull/10275)) - [Tiered caching] Defining interfaces, listeners and extending IndicesRequestCache with Tiered cache support ([#10753](https://github.com/opensearch-project/OpenSearch/pull/10753)) - [Remote cluster state] Restore cluster state version during remote state auto restore ([#10853](https://github.com/opensearch-project/OpenSearch/pull/10853)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java index a48cfd87b78c3..d01dce02fca31 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java @@ -32,6 +32,7 @@ package org.opensearch.ingest.common; +import org.opensearch.common.Nullable; import org.opensearch.core.common.Strings; import org.opensearch.index.VersionType; import org.opensearch.ingest.AbstractProcessor; @@ -42,11 +43,15 @@ import org.opensearch.script.TemplateScript; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; + /** * Processor that removes existing fields. Nothing happens if the field is not present. */ @@ -55,11 +60,28 @@ public final class RemoveProcessor extends AbstractProcessor { public static final String TYPE = "remove"; private final List fields; + private final List excludeFields; private final boolean ignoreMissing; - RemoveProcessor(String tag, String description, List fields, boolean ignoreMissing) { + RemoveProcessor( + String tag, + String description, + @Nullable List fields, + @Nullable List excludeFields, + boolean ignoreMissing + ) { super(tag, description); - this.fields = new ArrayList<>(fields); + if (fields == null && excludeFields == null || fields != null && excludeFields != null) { + throw new IllegalArgumentException("ether fields and excludeFields must be set"); + } + if (fields != null) { + this.fields = new ArrayList<>(fields); + this.excludeFields = null; + } else { + this.fields = null; + this.excludeFields = new ArrayList<>(excludeFields); + } + this.ignoreMissing = ignoreMissing; } @@ -67,42 +89,76 @@ public List getFields() { return fields; } + public List getExcludeFields() { + return excludeFields; + } + @Override public IngestDocument execute(IngestDocument document) { - fields.forEach(field -> { - String path = document.renderTemplate(field); - final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path); - if (fieldPathIsNullOrEmpty || document.hasField(path) == false) { - if (ignoreMissing) { - return; - } else if (fieldPathIsNullOrEmpty) { - throw new IllegalArgumentException("field path cannot be null nor empty"); - } else { - throw new IllegalArgumentException("field [" + path + "] doesn't exist"); + if (fields != null && !fields.isEmpty()) { + fields.forEach(field -> { + String path = document.renderTemplate(field); + final boolean fieldPathIsNullOrEmpty = Strings.isNullOrEmpty(path); + if (fieldPathIsNullOrEmpty || document.hasField(path) == false) { + if (ignoreMissing) { + return; + } else if (fieldPathIsNullOrEmpty) { + throw new IllegalArgumentException("field path cannot be null nor empty"); + } else { + throw new IllegalArgumentException("field [" + path + "] doesn't exist"); + } } - } - // cannot remove _index, _version and _version_type. - if (path.equals(IngestDocument.Metadata.INDEX.getFieldName()) - || path.equals(IngestDocument.Metadata.VERSION.getFieldName()) - || path.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { - throw new IllegalArgumentException("cannot remove metadata field [" + path + "]"); - } - // removing _id is disallowed when there's an external version specified in the request - if (path.equals(IngestDocument.Metadata.ID.getFieldName()) - && document.hasField(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { - String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); - if (!Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { - Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class, true); - throw new IllegalArgumentException( - "cannot remove metadata field [_id] when specifying external version for the document, version: " - + version - + ", version_type: " - + versionType - ); + + // cannot remove _index, _version and _version_type. + if (path.equals(IngestDocument.Metadata.INDEX.getFieldName()) + || path.equals(IngestDocument.Metadata.VERSION.getFieldName()) + || path.equals(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + throw new IllegalArgumentException("cannot remove metadata field [" + path + "]"); } + // removing _id is disallowed when there's an external version specified in the request + if (path.equals(IngestDocument.Metadata.ID.getFieldName()) + && document.hasField(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); + if (!Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { + Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class, true); + throw new IllegalArgumentException( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + version + + ", version_type: " + + versionType + ); + } + } + document.removeField(path); + }); + } + + if (excludeFields != null && !excludeFields.isEmpty()) { + Set excludeFieldSet = new HashSet<>(); + excludeFields.forEach(field -> { + String path = document.renderTemplate(field); + // ignore the empty or null field path + if (!Strings.isNullOrEmpty(path)) { + excludeFieldSet.add(path); + } + }); + + if (!excludeFieldSet.isEmpty()) { + Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); + Set metadataFields = document.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toSet()); + existingFields.forEach(field -> { + // ignore metadata fields such as _index, _id, etc. + if (!metadataFields.contains(field) && !excludeFieldSet.contains(field)) { + document.removeField(field); + } + }); } - document.removeField(path); - }); + } + return document; } @@ -127,20 +183,41 @@ public RemoveProcessor create( Map config ) throws Exception { final List fields = new ArrayList<>(); - final Object field = ConfigurationUtils.readObject(TYPE, processorTag, config, "field"); - if (field instanceof List) { - @SuppressWarnings("unchecked") - List stringList = (List) field; - fields.addAll(stringList); - } else { - fields.add((String) field); + final List excludeFields = new ArrayList<>(); + final Object field = ConfigurationUtils.readOptionalObject(config, "field"); + final Object excludeField = ConfigurationUtils.readOptionalObject(config, "exclude_field"); + + if (field == null && excludeField == null || field != null && excludeField != null) { + throw newConfigurationException(TYPE, processorTag, "field", "ether field or exclude_field must be set"); } - final List compiledTemplates = fields.stream() - .map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "field", f, scriptService)) - .collect(Collectors.toList()); boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); - return new RemoveProcessor(processorTag, description, compiledTemplates, ignoreMissing); + + if (field != null) { + if (field instanceof List) { + @SuppressWarnings("unchecked") + List stringList = (List) field; + fields.addAll(stringList); + } else { + fields.add((String) field); + } + List fieldCompiledTemplates = fields.stream() + .map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "field", f, scriptService)) + .collect(Collectors.toList()); + return new RemoveProcessor(processorTag, description, fieldCompiledTemplates, null, ignoreMissing); + } else { + if (excludeField instanceof List) { + @SuppressWarnings("unchecked") + List stringList = (List) excludeField; + excludeFields.addAll(stringList); + } else { + excludeFields.add((String) excludeField); + } + List excludeFieldCompiledTemplates = excludeFields.stream() + .map(f -> ConfigurationUtils.compileTemplate(TYPE, processorTag, "exclude_field", f, scriptService)) + .collect(Collectors.toList()); + return new RemoveProcessor(processorTag, description, null, excludeFieldCompiledTemplates, ignoreMissing); + } } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java index 66ca888a0d39f..179aef2feac0c 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorFactoryTests.java @@ -41,6 +41,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -79,16 +80,6 @@ public void testCreateMultipleFields() throws Exception { ); } - public void testCreateMissingField() throws Exception { - Map config = new HashMap<>(); - try { - factory.create(null, null, null, config); - fail("factory create should have failed"); - } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[field] required property is missing")); - } - } - public void testInvalidMustacheTemplate() throws Exception { RemoveProcessor.Factory factory = new RemoveProcessor.Factory(TestTemplateService.instance(true)); Map config = new HashMap<>(); @@ -98,4 +89,31 @@ public void testInvalidMustacheTemplate() throws Exception { assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); assertThat(exception.getMetadata("opensearch.processor_tag").get(0), equalTo(processorTag)); } + + public void testCreateWithExcludeField() throws Exception { + Map config = new HashMap<>(); + String processorTag = randomAlphaOfLength(10); + OpenSearchException exception = expectThrows( + OpenSearchParseException.class, + () -> factory.create(null, processorTag, null, config) + ); + assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set")); + + Map config2 = new HashMap<>(); + config2.put("field", "field1"); + config2.put("exclude_field", "field2"); + exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config2)); + assertThat(exception.getMessage(), equalTo("[field] ether field or exclude_field must be set")); + + Map config6 = new HashMap<>(); + config6.put("exclude_field", "exclude_field"); + RemoveProcessor removeProcessor = factory.create(null, processorTag, null, config6); + assertThat( + removeProcessor.getExcludeFields() + .stream() + .map(template -> template.newInstance(Collections.emptyMap()).execute()) + .collect(Collectors.toList()), + equalTo(List.of("exclude_field")) + ); + } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java index c138ad606d2e5..78a3d36124d45 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java @@ -38,8 +38,10 @@ import org.opensearch.ingest.Processor; import org.opensearch.ingest.RandomDocumentPicks; import org.opensearch.ingest.TestTemplateService; +import org.opensearch.script.TemplateScript; import org.opensearch.test.OpenSearchTestCase; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -57,12 +59,28 @@ public void testRemoveFields() throws Exception { randomAlphaOfLength(10), null, Collections.singletonList(new TestTemplateService.MockTemplateScript.Factory(field)), + null, false ); processor.execute(ingestDocument); assertThat(ingestDocument.hasField(field), equalTo(false)); } + public void testRemoveByExcludeFields() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + ingestDocument.setFieldValue("foo_1", "value"); + ingestDocument.setFieldValue("foo_2", "value"); + ingestDocument.setFieldValue("foo_3", "value"); + List excludeFields = new ArrayList<>(); + excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_1")); + excludeFields.add(new TestTemplateService.MockTemplateScript.Factory("foo_2")); + Processor processor = new RemoveProcessor(randomAlphaOfLength(10), null, null, excludeFields, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField("foo_1"), equalTo(true)); + assertThat(ingestDocument.hasField("foo_2"), equalTo(true)); + assertThat(ingestDocument.hasField("foo_3"), equalTo(false)); + } + public void testRemoveNonExistingField() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); String fieldName = RandomDocumentPicks.randomFieldName(random()); @@ -183,6 +201,34 @@ public void testRemoveMetadataField() throws Exception { } } + public void testCreateRemoveProcessorWithBothFieldsAndExcludeFields() throws Exception { + assertThrows( + "ether fields and excludeFields must be set", + IllegalArgumentException.class, + () -> new RemoveProcessor(randomAlphaOfLength(10), null, null, null, false) + ); + + final List fields; + if (randomBoolean()) { + fields = new ArrayList<>(); + } else { + fields = List.of(new TestTemplateService.MockTemplateScript.Factory("foo_1")); + } + + final List excludeFields; + if (randomBoolean()) { + excludeFields = new ArrayList<>(); + } else { + excludeFields = List.of(new TestTemplateService.MockTemplateScript.Factory("foo_2")); + } + + assertThrows( + "ether fields and excludeFields must be set", + IllegalArgumentException.class, + () -> new RemoveProcessor(randomAlphaOfLength(10), null, fields, excludeFields, false) + ); + } + public void testRemoveDocumentId() throws Exception { Map config = new HashMap<>(); config.put("field", IngestDocument.Metadata.ID.getFieldName()); diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml index 6668b468f8edc..e120a865052b0 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml @@ -319,3 +319,43 @@ teardown: } - match: { docs.0.error.type: "illegal_argument_exception" } - match: { docs.0.error.reason: "cannot remove metadata field [_id] when specifying external version for the document, version: 1, version_type: external_gte" } + +# Related issue: https://github.com/opensearch-project/OpenSearch/issues/1578 +--- +"Test remove processor with exclude_field": + - skip: + version: " - 2.11.99" + reason: "exclude_field is introduced in 2.12" + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "exclude_field": "bar" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo1: "bar", + foo2: "bar", + bar: "zoo", + zoo: "bar" + } + + - do: + get: + index: test + id: 1 + - match: { _source: { bar: "zoo"}} diff --git a/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java b/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java index 5185b740d90cb..a2c2137130587 100644 --- a/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java +++ b/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java @@ -387,6 +387,7 @@ private static Map readMap(String processorType, String processor /** * Returns and removes the specified property as an {@link Object} from the specified configuration map. + * If the property is missing an {@link OpenSearchParseException} is thrown */ public static Object readObject(String processorType, String processorTag, Map configuration, String propertyName) { Object value = configuration.remove(propertyName); @@ -396,6 +397,13 @@ public static Object readObject(String processorType, String processorTag, Map configuration, String propertyName) { + return configuration.remove(propertyName); + } + public static OpenSearchException newConfigurationException( String processorType, String processorTag, From d5a6f99d9c8c3e34628e4807d351001647f235c5 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Wed, 17 Jan 2024 09:29:52 -0800 Subject: [PATCH 11/11] Apply the fast filter optimization to composite aggregation (#11505) We can do efficient DateHistogram aggregation under composite aggregations. --------- Signed-off-by: bowenlan-amzn --- CHANGELOG.md | 1 + .../bucket/FastFilterRewriteHelper.java | 385 ++++++++++++++++++ .../bucket/composite/CompositeAggregator.java | 98 ++++- .../bucket/composite/CompositeKey.java | 6 +- .../CompositeValuesCollectorQueue.java | 30 +- .../CompositeValuesSourceConfig.java | 2 +- .../DateHistogramValuesSourceBuilder.java | 2 +- .../bucket/composite/InternalComposite.java | 6 +- .../composite/PointsSortedDocsProducer.java | 6 +- .../composite/RoundingValuesSource.java | 24 +- .../AutoDateHistogramAggregator.java | 67 ++- .../histogram/DateHistogramAggregator.java | 60 ++- .../bucket/histogram/FilterRewriteHelper.java | 259 ------------ .../composite/CompositeAggregatorTests.java | 17 +- 14 files changed, 585 insertions(+), 378 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java delete mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/FilterRewriteHelper.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d083406d63518..ea96035807276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -188,6 +188,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) - Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) +- Apply the fast filter optimization to composite aggregation of date histogram source ([#11505](https://github.com/opensearch-project/OpenSearch/pull/11083)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) - Improved performance of numeric exact-match queries ([#11209](https://github.com/opensearch-project/OpenSearch/pull/11209)) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java new file mode 100644 index 0000000000000..f377287d0b3bd --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/FastFilterRewriteHelper.java @@ -0,0 +1,385 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.bucket; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.CheckedFunction; +import org.opensearch.common.Rounding; +import org.opensearch.common.lucene.search.function.FunctionScoreQuery; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.query.DateRangeIncludingNowQuery; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.aggregations.bucket.composite.CompositeKey; +import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceConfig; +import org.opensearch.search.aggregations.bucket.composite.RoundingValuesSource; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.OptionalLong; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Utility class to help rewrite aggregations into filters. + * Instead of aggregation collects documents one by one, filter may count all documents that match in one pass. + *

+ * Currently supported rewrite: + *

    + *
  • date histogram : date range filter. + * Applied: DateHistogramAggregator, AutoDateHistogramAggregator, CompositeAggregator
  • + *
+ * + * @opensearch.internal + */ +public final class FastFilterRewriteHelper { + + private FastFilterRewriteHelper() {} + + private static final int MAX_NUM_FILTER_BUCKETS = 1024; + private static final Map, Function> queryWrappers; + + // Initialize the wrapper map for unwrapping the query + static { + queryWrappers = new HashMap<>(); + queryWrappers.put(ConstantScoreQuery.class, q -> ((ConstantScoreQuery) q).getQuery()); + queryWrappers.put(FunctionScoreQuery.class, q -> ((FunctionScoreQuery) q).getSubQuery()); + queryWrappers.put(DateRangeIncludingNowQuery.class, q -> ((DateRangeIncludingNowQuery) q).getQuery()); + queryWrappers.put(IndexOrDocValuesQuery.class, q -> ((IndexOrDocValuesQuery) q).getIndexQuery()); + } + + /** + * Recursively unwraps query into the concrete form + * for applying the optimization + */ + private static Query unwrapIntoConcreteQuery(Query query) { + while (queryWrappers.containsKey(query.getClass())) { + query = queryWrappers.get(query.getClass()).apply(query); + } + + return query; + } + + /** + * Finds the min and max bounds of field values for the shard + */ + private static long[] getIndexBounds(final SearchContext context, final String fieldName) throws IOException { + final List leaves = context.searcher().getIndexReader().leaves(); + long min = Long.MAX_VALUE, max = Long.MIN_VALUE; + // Since the query does not specify bounds for aggregation, we can + // build the global min/max from local min/max within each segment + for (LeafReaderContext leaf : leaves) { + final PointValues values = leaf.reader().getPointValues(fieldName); + if (values != null) { + min = Math.min(min, NumericUtils.sortableBytesToLong(values.getMinPackedValue(), 0)); + max = Math.max(max, NumericUtils.sortableBytesToLong(values.getMaxPackedValue(), 0)); + } + } + + if (min == Long.MAX_VALUE || max == Long.MIN_VALUE) return null; + + return new long[] { min, max }; + } + + /** + * This method also acts as a pre-condition check for the optimization, + * returns null if the optimization cannot be applied + */ + public static long[] getAggregationBounds(final SearchContext context, final String fieldName) throws IOException { + final Query cq = unwrapIntoConcreteQuery(context.query()); + final long[] indexBounds = getIndexBounds(context, fieldName); + if (cq instanceof PointRangeQuery) { + final PointRangeQuery prq = (PointRangeQuery) cq; + // Ensure that the query and aggregation are on the same field + if (prq.getField().equals(fieldName)) { + return new long[] { + // Minimum bound for aggregation is the max between query and global + Math.max(NumericUtils.sortableBytesToLong(prq.getLowerPoint(), 0), indexBounds[0]), + // Maximum bound for aggregation is the min between query and global + Math.min(NumericUtils.sortableBytesToLong(prq.getUpperPoint(), 0), indexBounds[1]) }; + } + } else if (cq instanceof MatchAllDocsQuery) { + return indexBounds; + } + // Check if the top-level query (which may be a PRQ on another field) is functionally match-all + Weight weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f); + for (LeafReaderContext ctx : context.searcher().getIndexReader().leaves()) { + if (weight.count(ctx) != ctx.reader().numDocs()) { + return null; + } + } + return indexBounds; + } + + /** + * Creates the date range filters for aggregations using the interval, min/max + * bounds and the rounding values + */ + private static Weight[] createFilterForAggregations( + final SearchContext context, + final long interval, + final Rounding.Prepared preparedRounding, + final String field, + final DateFieldMapper.DateFieldType fieldType, + long low, + final long high + ) throws IOException { + // Calculate the number of buckets using range and interval + long roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); + long prevRounded = roundedLow; + int bucketCount = 0; + while (roundedLow <= fieldType.convertNanosToMillis(high)) { + bucketCount++; + if (bucketCount > MAX_NUM_FILTER_BUCKETS) return null; + // Below rounding is needed as the interval could return in + // non-rounded values for something like calendar month + roundedLow = preparedRounding.round(roundedLow + interval); + if (prevRounded == roundedLow) break; // prevents getting into an infinite loop + prevRounded = roundedLow; + } + + Weight[] filters = null; + if (bucketCount > 0) { + filters = new Weight[bucketCount]; + roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); + + int i = 0; + while (i < bucketCount) { + // Calculate the lower bucket bound + final byte[] lower = new byte[8]; + NumericUtils.longToSortableBytes(i == 0 ? low : fieldType.convertRoundedMillisToNanos(roundedLow), lower, 0); + + // Calculate the upper bucket bound + roundedLow = preparedRounding.round(roundedLow + interval); + final byte[] upper = new byte[8]; + NumericUtils.longToSortableBytes(i + 1 == bucketCount ? high : + // Subtract -1 if the minimum is roundedLow as roundedLow itself + // is included in the next bucket + fieldType.convertRoundedMillisToNanos(roundedLow) - 1, upper, 0); + + filters[i++] = context.searcher().createWeight(new PointRangeQuery(field, lower, upper, 1) { + @Override + protected String toString(int dimension, byte[] value) { + return null; + } + }, ScoreMode.COMPLETE_NO_SCORES, 1); + } + } + + return filters; + } + + /** + * Context object to do fast filter optimization + */ + public static class FastFilterContext { + private Weight[] filters = null; + public AggregationType aggregationType; + + public FastFilterContext() {} + + private void setFilters(Weight[] filters) { + this.filters = filters; + } + + public void setAggregationType(AggregationType aggregationType) { + this.aggregationType = aggregationType; + } + + public boolean isRewriteable(final Object parent, final int subAggLength) { + return aggregationType.isRewriteable(parent, subAggLength); + } + + /** + * This filter build method is for date histogram aggregation type + * + * @param computeBounds get the lower and upper bound of the field in a shard search + * @param roundingFunction produce Rounding that contains interval of date range. + * Rounding is computed dynamically using the bounds in AutoDateHistogram + * @param preparedRoundingSupplier produce PreparedRounding to round values at call-time + */ + public void buildFastFilter( + SearchContext context, + CheckedFunction computeBounds, + Function roundingFunction, + Supplier preparedRoundingSupplier + ) throws IOException { + assert this.aggregationType instanceof DateHistogramAggregationType; + DateHistogramAggregationType aggregationType = (DateHistogramAggregationType) this.aggregationType; + DateFieldMapper.DateFieldType fieldType = aggregationType.getFieldType(); + final long[] bounds = computeBounds.apply(aggregationType); + if (bounds == null) return; + + final Rounding rounding = roundingFunction.apply(bounds); + final OptionalLong intervalOpt = Rounding.getInterval(rounding); + if (intervalOpt.isEmpty()) return; + final long interval = intervalOpt.getAsLong(); + + // afterKey is the last bucket key in previous response, while the bucket key + // is the start of the bucket values, so add the interval + if (aggregationType instanceof CompositeAggregationType && ((CompositeAggregationType) aggregationType).afterKey != -1) { + bounds[0] = ((CompositeAggregationType) aggregationType).afterKey + interval; + } + + final Weight[] filters = FastFilterRewriteHelper.createFilterForAggregations( + context, + interval, + preparedRoundingSupplier.get(), + fieldType.name(), + fieldType, + bounds[0], + bounds[1] + ); + this.setFilters(filters); + } + } + + /** + * Different types have different pre-conditions, filter building logic, etc. + */ + public interface AggregationType { + boolean isRewriteable(Object parent, int subAggLength); + } + + /** + * For date histogram aggregation + */ + public static class DateHistogramAggregationType implements AggregationType { + private final MappedFieldType fieldType; + private final boolean missing; + private final boolean hasScript; + + public DateHistogramAggregationType(MappedFieldType fieldType, boolean missing, boolean hasScript) { + this.fieldType = fieldType; + this.missing = missing; + this.hasScript = hasScript; + } + + @Override + public boolean isRewriteable(Object parent, int subAggLength) { + if (parent == null && subAggLength == 0 && !missing && !hasScript) { + return fieldType != null && fieldType instanceof DateFieldMapper.DateFieldType; + } + return false; + } + + public DateFieldMapper.DateFieldType getFieldType() { + assert fieldType instanceof DateFieldMapper.DateFieldType; + return (DateFieldMapper.DateFieldType) fieldType; + } + } + + /** + * For composite aggregation with date histogram as a source + */ + public static class CompositeAggregationType extends DateHistogramAggregationType { + private final RoundingValuesSource valuesSource; + private long afterKey = -1L; + private final int size; + + public CompositeAggregationType( + CompositeValuesSourceConfig[] sourceConfigs, + CompositeKey rawAfterKey, + List formats, + int size + ) { + super(sourceConfigs[0].fieldType(), sourceConfigs[0].missingBucket(), sourceConfigs[0].hasScript()); + this.valuesSource = (RoundingValuesSource) sourceConfigs[0].valuesSource(); + this.size = size; + if (rawAfterKey != null) { + assert rawAfterKey.size() == 1 && formats.size() == 1; + this.afterKey = formats.get(0).parseLong(rawAfterKey.get(0).toString(), false, () -> { + throw new IllegalArgumentException("now() is not supported in [after] key"); + }); + } + } + + public Rounding getRounding() { + return valuesSource.getRounding(); + } + + public Rounding.Prepared getRoundingPreparer() { + return valuesSource.getPreparedRounding(); + } + } + + public static boolean isCompositeAggRewriteable(CompositeValuesSourceConfig[] sourceConfigs) { + return sourceConfigs.length == 1 && sourceConfigs[0].valuesSource() instanceof RoundingValuesSource; + } + + public static long getBucketOrd(long bucketOrd) { + if (bucketOrd < 0) { // already seen + bucketOrd = -1 - bucketOrd; + } + + return bucketOrd; + } + + /** + * This is executed for each segment by passing the leaf reader context + * + * @param incrementDocCount takes in the bucket key value and the bucket count + */ + public static boolean tryFastFilterAggregation( + final LeafReaderContext ctx, + FastFilterContext fastFilterContext, + final BiConsumer incrementDocCount + ) throws IOException { + if (fastFilterContext == null) return false; + if (fastFilterContext.filters == null) return false; + + final Weight[] filters = fastFilterContext.filters; + final int[] counts = new int[filters.length]; + int i; + for (i = 0; i < filters.length; i++) { + counts[i] = filters[i].count(ctx); + if (counts[i] == -1) { + // Cannot use the optimization if any of the counts + // is -1 indicating the segment might have deleted documents + return false; + } + } + + int s = 0; + int size = Integer.MAX_VALUE; + for (i = 0; i < filters.length; i++) { + if (counts[i] > 0) { + long bucketKey = i; // the index of filters is the key for filters aggregation + if (fastFilterContext.aggregationType instanceof DateHistogramAggregationType) { + final DateFieldMapper.DateFieldType fieldType = ((DateHistogramAggregationType) fastFilterContext.aggregationType) + .getFieldType(); + bucketKey = fieldType.convertNanosToMillis( + NumericUtils.sortableBytesToLong(((PointRangeQuery) filters[i].getQuery()).getLowerPoint(), 0) + ); + if (fastFilterContext.aggregationType instanceof CompositeAggregationType) { + size = ((CompositeAggregationType) fastFilterContext.aggregationType).size; + } + } + incrementDocCount.accept(bucketKey, counts[i]); + s++; + if (s > size) return true; + } + } + + return true; + } +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index 317c2a357bac5..822b8a6c4b118 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -56,7 +56,9 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.search.comparators.LongComparator; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.RoaringDocIdSet; +import org.opensearch.common.Rounding; import org.opensearch.common.lease.Releasables; import org.opensearch.index.IndexSortConfig; import org.opensearch.lucene.queries.SearchAfterSortedDocQuery; @@ -71,7 +73,9 @@ import org.opensearch.search.aggregations.MultiBucketCollector; import org.opensearch.search.aggregations.MultiBucketConsumerService; import org.opensearch.search.aggregations.bucket.BucketsAggregator; +import org.opensearch.search.aggregations.bucket.FastFilterRewriteHelper; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.searchafter.SearchAfterBuilder; import org.opensearch.search.sort.SortAndFormats; @@ -80,6 +84,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.LongUnaryOperator; @@ -111,6 +116,10 @@ final class CompositeAggregator extends BucketsAggregator { private boolean earlyTerminated; + private final FastFilterRewriteHelper.FastFilterContext fastFilterContext; + private LongKeyedBucketOrds bucketOrds = null; + private Rounding.Prepared preparedRounding = null; + CompositeAggregator( String name, AggregatorFactories factories, @@ -154,12 +163,33 @@ final class CompositeAggregator extends BucketsAggregator { } this.queue = new CompositeValuesCollectorQueue(context.bigArrays(), sources, size, rawAfterKey); this.rawAfterKey = rawAfterKey; + + fastFilterContext = new FastFilterRewriteHelper.FastFilterContext(); + if (!FastFilterRewriteHelper.isCompositeAggRewriteable(sourceConfigs)) return; + fastFilterContext.setAggregationType( + new FastFilterRewriteHelper.CompositeAggregationType(sourceConfigs, rawAfterKey, formats, size) + ); + if (fastFilterContext.isRewriteable(parent, subAggregators.length)) { + // bucketOrds is the data structure for saving date histogram results + bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), CardinalityUpperBound.ONE); + // Currently the filter rewrite is only supported for date histograms + FastFilterRewriteHelper.CompositeAggregationType aggregationType = + (FastFilterRewriteHelper.CompositeAggregationType) fastFilterContext.aggregationType; + preparedRounding = aggregationType.getRoundingPreparer(); + fastFilterContext.buildFastFilter( + context, + fc -> FastFilterRewriteHelper.getAggregationBounds(context, fc.getFieldType().name()), + x -> aggregationType.getRounding(), + () -> preparedRounding + ); + } } @Override protected void doClose() { try { Releasables.close(queue); + Releasables.close(bucketOrds); } finally { Releasables.close(sources); } @@ -187,12 +217,14 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I } int num = Math.min(size, queue.size()); - final InternalComposite.InternalBucket[] buckets = new InternalComposite.InternalBucket[num]; + InternalComposite.InternalBucket[] buckets = new InternalComposite.InternalBucket[num]; + long[] bucketOrdsToCollect = new long[queue.size()]; for (int i = 0; i < queue.size(); i++) { bucketOrdsToCollect[i] = i; } InternalAggregations[] subAggsForBuckets = buildSubAggsForBuckets(bucketOrdsToCollect); + while (queue.size() > 0) { int slot = queue.pop(); CompositeKey key = queue.toCompositeKey(slot); @@ -208,6 +240,43 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I aggs ); } + + // Build results from fast filters optimization + if (bucketOrds != null) { + // CompositeKey is the value of bucket key + final Map bucketMap = new HashMap<>(); + // Some segments may not be optimized, so buckets may contain results from the queue. + for (InternalComposite.InternalBucket internalBucket : buckets) { + bucketMap.put(internalBucket.getRawKey(), internalBucket); + } + // Loop over the buckets in the bucketOrds, and populate the map accordingly + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(0); + while (ordsEnum.next()) { + Long bucketKeyValue = ordsEnum.value(); + CompositeKey key = new CompositeKey(bucketKeyValue); + if (bucketMap.containsKey(key)) { + long docCount = bucketDocCount(ordsEnum.ord()) + bucketMap.get(key).getDocCount(); + bucketMap.get(key).setDocCount(docCount); + } else { + InternalComposite.InternalBucket bucket = new InternalComposite.InternalBucket( + sourceNames, + formats, + key, + reverseMuls, + missingOrders, + bucketDocCount(ordsEnum.ord()), + buildEmptySubAggregations() + ); + bucketMap.put(key, bucket); + } + } + // since a map is not sorted structure, sort it before transform back to buckets + List bucketList = new ArrayList<>(bucketMap.values()); + CollectionUtil.introSort(bucketList, InternalComposite.InternalBucket::compareKey); + buckets = bucketList.subList(0, Math.min(size, bucketList.size())).toArray(InternalComposite.InternalBucket[]::new); + num = buckets.length; + } + CompositeKey lastBucket = num > 0 ? buckets[num - 1].getRawKey() : null; return new InternalAggregation[] { new InternalComposite( @@ -296,7 +365,7 @@ private Sort buildIndexSortPrefix(LeafReaderContext context) throws IOException if (indexSortField.getReverse() != (source.reverseMul == -1)) { if (i == 0) { - // the leading index sort matches the leading source field but the order is reversed + // the leading index sort matches the leading source field, but the order is reversed, // so we don't check the other sources. return new Sort(indexSortField); } @@ -304,8 +373,8 @@ private Sort buildIndexSortPrefix(LeafReaderContext context) throws IOException } sortFields.add(indexSortField); if (sourceConfig.valuesSource() instanceof RoundingValuesSource) { - // the rounding "squashes" many values together, that breaks the ordering of sub-values - // so we ignore subsequent source even if they match the index sort. + // the rounding "squashes" many values together, that breaks the ordering of sub-values, + // so we ignore the subsequent sources even if they match the index sort. break; } } @@ -448,6 +517,16 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + boolean optimized = FastFilterRewriteHelper.tryFastFilterAggregation( + ctx, + fastFilterContext, + (key, count) -> incrementBucketDocCount( + FastFilterRewriteHelper.getBucketOrd(bucketOrds.add(0, preparedRounding.round(key))), + count + ) + ); + if (optimized) throw new CollectionTerminatedException(); + finishLeaf(); boolean fillDocIdSet = deferredCollectors != NO_OP_COLLECTOR; @@ -477,9 +556,10 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket docIdSetBuilder = new RoaringDocIdSet.Builder(ctx.reader().maxDoc()); } if (rawAfterKey != null && sortPrefixLen > 0) { - // We have an after key and index sort is applicable so we jump directly to the doc - // that is after the index sort prefix using the rawAfterKey and we start collecting - // document from there. + // We have an after key and index sort is applicable, so we jump directly to the doc + // after the index sort prefix using the rawAfterKey and we start collecting + // documents from there. + assert indexSortPrefix != null; processLeafFromQuery(ctx, indexSortPrefix); throw new CollectionTerminatedException(); } else { @@ -507,6 +587,8 @@ public void collect(int doc, long bucket) throws IOException { try { long docCount = docCountProvider.getDocCount(doc); if (queue.addIfCompetitive(indexSortPrefix, docCount)) { + // one doc may contain multiple values, we iterate over and collect one by one + // so the same doc can appear multiple times here if (builder != null && lastDoc != doc) { builder.add(doc); lastDoc = doc; @@ -569,7 +651,7 @@ private LeafBucketCollector getSecondPassCollector(LeafBucketCollector subCollec @Override public void collect(int doc, long zeroBucket) throws IOException { assert zeroBucket == 0; - Integer slot = queue.compareCurrent(); + Integer slot = queue.getCurrentSlot(); if (slot != null) { // The candidate key is a top bucket. // We can defer the collection of this document/bucket to the sub collector diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeKey.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeKey.java index 5ddeb22d33a6f..338ebdc66eef7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeKey.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeKey.java @@ -44,7 +44,7 @@ * * @opensearch.internal */ -class CompositeKey implements Writeable { +public class CompositeKey implements Writeable { private final Comparable[] values; CompositeKey(Comparable... values) { @@ -64,11 +64,11 @@ Comparable[] values() { return values; } - int size() { + public int size() { return values.length; } - Comparable get(int pos) { + public Comparable get(int pos) { assert pos < values.length; return values[pos]; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java index 6ee1682a7b196..2c4d451322bca 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueue.java @@ -47,6 +47,8 @@ /** * A specialized {@link PriorityQueue} implementation for composite buckets. + * Can think of this as a max heap that holds the top small buckets slots in order. + * Each slot holds the values of the composite bucket key it represents. * * @opensearch.internal */ @@ -77,7 +79,7 @@ public int hashCode() { private final BigArrays bigArrays; private final int maxSize; - private final Map map; + private final Map map; // to quickly find the slot for a value private final SingleDimensionValuesSource[] arrays; private LongArray docCounts; @@ -108,7 +110,7 @@ public int hashCode() { @Override protected boolean lessThan(Integer a, Integer b) { - return compare(a, b) > 0; + return compare(a, b) > 0; // max heap } /** @@ -119,10 +121,10 @@ boolean isFull() { } /** - * Compares the current candidate with the values in the queue and returns + * Try to get the slot of the current/candidate values in the queue and returns * the slot if the candidate is already in the queue or null if the candidate is not present. */ - Integer compareCurrent() { + Integer getCurrentSlot() { return map.get(new Slot(CANDIDATE_SLOT)); } @@ -281,32 +283,34 @@ boolean addIfCompetitive(long inc) { */ boolean addIfCompetitive(int indexSortSourcePrefix, long inc) { // checks if the candidate key is competitive - Integer topSlot = compareCurrent(); - if (topSlot != null) { + Integer curSlot = getCurrentSlot(); + if (curSlot != null) { // this key is already in the top N, skip it - docCounts.increment(topSlot, inc); + docCounts.increment(curSlot, inc); return true; } + if (afterKeyIsSet) { int cmp = compareCurrentWithAfter(); if (cmp <= 0) { if (indexSortSourcePrefix < 0 && cmp == indexSortSourcePrefix) { - // the leading index sort is in the reverse order of the leading source + // the leading index sort is and the leading source order are both reversed, // so we can early terminate when we reach a document that is smaller // than the after key (collected on a previous page). throw new CollectionTerminatedException(); } - // key was collected on a previous page, skip it (>= afterKey). + // the key was collected on a previous page, skip it. return false; } } + + // the heap is full, check if the candidate key larger than max heap top if (size() >= maxSize) { - // the tree map is full, check if the candidate key should be kept int cmp = compare(CANDIDATE_SLOT, top()); if (cmp > 0) { if (cmp <= indexSortSourcePrefix) { - // index sort guarantees that there is no key greater or equal than the - // current one in the subsequent documents so we can early terminate. + // index sort guarantees the following documents will have a key larger than the current candidate, + // so we can early terminate. throw new CollectionTerminatedException(); } // the candidate key is not competitive, skip it. @@ -324,7 +328,7 @@ boolean addIfCompetitive(int indexSortSourcePrefix, long inc) { } else { newSlot = size(); } - // move the candidate key to its new slot + // move the candidate key to its new slot by copy its values to the new slot copyCurrent(newSlot, inc); map.put(new Slot(newSlot), newSlot); add(newSlot); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesSourceConfig.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesSourceConfig.java index 788a4ddc15374..5289b3a34ab34 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesSourceConfig.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeValuesSourceConfig.java @@ -156,7 +156,7 @@ public MissingOrder missingOrder() { /** * Returns true if the source contains a script that can change the value. */ - protected boolean hasScript() { + public boolean hasScript() { return hasScript; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/DateHistogramValuesSourceBuilder.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/DateHistogramValuesSourceBuilder.java index fd94ba355238a..3926ce9bbecb7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/DateHistogramValuesSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/DateHistogramValuesSourceBuilder.java @@ -298,7 +298,7 @@ public static void register(ValuesSourceRegistry.Builder builder) { // TODO once composite is plugged in to the values source registry or at least understands Date values source types use it // here Rounding.Prepared preparedRounding = rounding.prepareForUnknown(); - RoundingValuesSource vs = new RoundingValuesSource(numeric, preparedRounding); + RoundingValuesSource vs = new RoundingValuesSource(numeric, preparedRounding, rounding); // is specified in the builder. final DocValueFormat docValueFormat = format == null ? DocValueFormat.RAW : valuesSourceConfig.format(); final MappedFieldType fieldType = valuesSourceConfig.fieldType(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/InternalComposite.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/InternalComposite.java index 9f8a4cff5f3fc..43f1ad32a66f4 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/InternalComposite.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/InternalComposite.java @@ -339,7 +339,7 @@ public static class InternalBucket extends InternalMultiBucketAggregation.Intern KeyComparable { private final CompositeKey key; - private final long docCount; + private long docCount; private final InternalAggregations aggregations; private final transient int[] reverseMuls; private final transient MissingOrder[] missingOrders; @@ -436,6 +436,10 @@ public long getDocCount() { return docCount; } + public void setDocCount(long docCount) { + this.docCount = docCount; + } + @Override public Aggregations getAggregations() { return aggregations; diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/PointsSortedDocsProducer.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/PointsSortedDocsProducer.java index 3d6730203b6ae..dc130eb54c0ea 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/PointsSortedDocsProducer.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/PointsSortedDocsProducer.java @@ -68,6 +68,7 @@ DocIdSet processLeaf(Query query, CompositeValuesCollectorQueue queue, LeafReade // no value for the field return DocIdSet.EMPTY; } + long lowerBucket = Long.MIN_VALUE; Comparable lowerValue = queue.getLowerValueLeadSource(); if (lowerValue != null) { @@ -76,7 +77,6 @@ DocIdSet processLeaf(Query query, CompositeValuesCollectorQueue queue, LeafReade } lowerBucket = (Long) lowerValue; } - long upperBucket = Long.MAX_VALUE; Comparable upperValue = queue.getUpperValueLeadSource(); if (upperValue != null) { @@ -85,6 +85,7 @@ DocIdSet processLeaf(Query query, CompositeValuesCollectorQueue queue, LeafReade } upperBucket = (Long) upperValue; } + DocIdSetBuilder builder = fillDocIdSet ? new DocIdSetBuilder(context.reader().maxDoc(), values, field) : null; Visitor visitor = new Visitor(context, queue, builder, values.getBytesPerDimension(), lowerBucket, upperBucket); try { @@ -146,6 +147,7 @@ public void visit(int docID, byte[] packedValue) throws IOException { } long bucket = bucketFunction.applyAsLong(packedValue); + // process previous bucket when new bucket appears if (first == false && bucket != lastBucket) { final DocIdSet docIdSet = bucketDocsBuilder.build(); if (processBucket(queue, context, docIdSet.iterator(), lastBucket, builder) && @@ -182,13 +184,13 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue return PointValues.Relation.CELL_OUTSIDE_QUERY; } } - if (upperBucket != Long.MAX_VALUE) { long minBucket = bucketFunction.applyAsLong(minPackedValue); if (minBucket > upperBucket) { return PointValues.Relation.CELL_OUTSIDE_QUERY; } } + return PointValues.Relation.CELL_CROSSES_QUERY; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/RoundingValuesSource.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/RoundingValuesSource.java index 89315724ff9ed..3f5cf919f1755 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/RoundingValuesSource.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/RoundingValuesSource.java @@ -47,17 +47,19 @@ * * @opensearch.internal */ -class RoundingValuesSource extends ValuesSource.Numeric { +public class RoundingValuesSource extends ValuesSource.Numeric { private final ValuesSource.Numeric vs; - private final Rounding.Prepared rounding; + private final Rounding.Prepared preparedRounding; + private final Rounding rounding; /** - * - * @param vs The original values source - * @param rounding How to round the values + * @param vs The original values source + * @param preparedRounding How to round the values + * @param rounding The rounding strategy */ - RoundingValuesSource(Numeric vs, Rounding.Prepared rounding) { + RoundingValuesSource(Numeric vs, Rounding.Prepared preparedRounding, Rounding rounding) { this.vs = vs; + this.preparedRounding = preparedRounding; this.rounding = rounding; } @@ -71,8 +73,16 @@ public boolean isBigInteger() { return false; } + public Rounding.Prepared getPreparedRounding() { + return preparedRounding; + } + + public Rounding getRounding() { + return rounding; + } + public long round(long value) { - return rounding.round(value); + return preparedRounding.round(value); } @Override diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index a71c15d551927..0ea820abbedf4 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -33,8 +33,8 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; import org.apache.lucene.util.CollectionUtil; import org.opensearch.common.Rounding; import org.opensearch.common.Rounding.Prepared; @@ -42,7 +42,6 @@ import org.opensearch.common.util.IntArray; import org.opensearch.common.util.LongArray; import org.opensearch.core.common.util.ByteArray; -import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -53,6 +52,7 @@ import org.opensearch.search.aggregations.LeafBucketCollectorBase; import org.opensearch.search.aggregations.bucket.DeferableBucketAggregator; import org.opensearch.search.aggregations.bucket.DeferringBucketCollector; +import org.opensearch.search.aggregations.bucket.FastFilterRewriteHelper; import org.opensearch.search.aggregations.bucket.MergingBucketsDeferringCollector; import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.RoundingInfo; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; @@ -127,14 +127,14 @@ static AutoDateHistogramAggregator build( * {@link MergingBucketsDeferringCollector#mergeBuckets(long[])}. */ private MergingBucketsDeferringCollector deferringCollector; - private final Weight[] filters; - private final DateFieldMapper.DateFieldType fieldType; protected final RoundingInfo[] roundingInfos; protected final int targetBuckets; protected int roundingIdx; protected Rounding.Prepared preparedRounding; + private final FastFilterRewriteHelper.FastFilterContext fastFilterContext; + private AutoDateHistogramAggregator( String name, AggregatorFactories factories, @@ -156,23 +156,23 @@ private AutoDateHistogramAggregator( this.roundingPreparer = roundingPreparer; this.preparedRounding = prepareRounding(0); - FilterRewriteHelper.FilterContext filterContext = FilterRewriteHelper.buildFastFilterContext( - parent(), - subAggregators.length, - context, - b -> getMinimumRounding(b[0], b[1]), - // Passing prepared rounding as supplier to ensure the correct prepared - // rounding is set as it is done during getMinimumRounding - () -> preparedRounding, - valuesSourceConfig, - fc -> FilterRewriteHelper.getAggregationBounds(context, fc.field()) + fastFilterContext = new FastFilterRewriteHelper.FastFilterContext(); + fastFilterContext.setAggregationType( + new FastFilterRewriteHelper.DateHistogramAggregationType( + valuesSourceConfig.fieldType(), + valuesSourceConfig.missing() != null, + valuesSourceConfig.script() != null + ) ); - if (filterContext != null) { - fieldType = filterContext.fieldType; - filters = filterContext.filters; - } else { - fieldType = null; - filters = null; + if (fastFilterContext.isRewriteable(parent, subAggregators.length)) { + fastFilterContext.buildFastFilter( + context, + fc -> FastFilterRewriteHelper.getAggregationBounds(context, fc.getFieldType().name()), + b -> getMinimumRounding(b[0], b[1]), + // Passing prepared rounding as supplier to ensure the correct prepared + // rounding is set as it is done during getMinimumRounding + () -> preparedRounding + ); } } @@ -226,28 +226,21 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } + boolean optimized = FastFilterRewriteHelper.tryFastFilterAggregation( + ctx, + fastFilterContext, + (key, count) -> incrementBucketDocCount( + FastFilterRewriteHelper.getBucketOrd(getBucketOrds().add(0, preparedRounding.round(key))), + count + ) + ); + if (optimized) throw new CollectionTerminatedException(); + final SortedNumericDocValues values = valuesSource.longValues(ctx); final LeafBucketCollector iteratingCollector = getLeafCollector(values, sub); - - // Need to be declared as final and array for usage within the - // LeafBucketCollectorBase subclass below - final boolean[] useOpt = new boolean[1]; - useOpt[0] = filters != null; - return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { - // Try fast filter aggregation if the filters have been created - // Skip if tried before and gave incorrect/incomplete results - if (useOpt[0]) { - useOpt[0] = FilterRewriteHelper.tryFastFilterAggregation(ctx, filters, fieldType, (key, count) -> { - incrementBucketDocCount( - FilterRewriteHelper.getBucketOrd(getBucketOrds().add(owningBucketOrd, preparedRounding.round(key))), - count - ); - }); - } - iteratingCollector.collect(doc, owningBucketOrd); } }; diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 8437e1dce9fe0..b95bd093b82a6 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -33,13 +33,12 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; import org.apache.lucene.util.CollectionUtil; import org.opensearch.common.Nullable; import org.opensearch.common.Rounding; import org.opensearch.common.lease.Releasables; -import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -49,8 +48,8 @@ import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; import org.opensearch.search.aggregations.bucket.BucketsAggregator; +import org.opensearch.search.aggregations.bucket.FastFilterRewriteHelper; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; -import org.opensearch.search.aggregations.support.FieldContext; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -81,9 +80,9 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg private final long minDocCount; private final LongBounds extendedBounds; private final LongBounds hardBounds; - private final Weight[] filters; private final LongKeyedBucketOrds bucketOrds; - private final DateFieldMapper.DateFieldType fieldType; + + private final FastFilterRewriteHelper.FastFilterContext fastFilterContext; DateHistogramAggregator( String name, @@ -116,26 +115,21 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); - FilterRewriteHelper.FilterContext filterContext = FilterRewriteHelper.buildFastFilterContext( - parent, - subAggregators.length, - context, - x -> rounding, - () -> preparedRounding, - valuesSourceConfig, - this::computeBounds + fastFilterContext = new FastFilterRewriteHelper.FastFilterContext(); + fastFilterContext.setAggregationType( + new FastFilterRewriteHelper.DateHistogramAggregationType( + valuesSourceConfig.fieldType(), + valuesSourceConfig.missing() != null, + valuesSourceConfig.script() != null + ) ); - if (filterContext != null) { - fieldType = filterContext.fieldType; - filters = filterContext.filters; - } else { - filters = null; - fieldType = null; + if (fastFilterContext.isRewriteable(parent, subAggregators.length)) { + fastFilterContext.buildFastFilter(context, this::computeBounds, x -> rounding, () -> preparedRounding); } } - private long[] computeBounds(final FieldContext fieldContext) throws IOException { - final long[] bounds = FilterRewriteHelper.getAggregationBounds(context, fieldContext.field()); + private long[] computeBounds(final FastFilterRewriteHelper.DateHistogramAggregationType fieldContext) throws IOException { + final long[] bounds = FastFilterRewriteHelper.getAggregationBounds(context, fieldContext.getFieldType().name()); if (bounds != null) { // Update min/max limit if user specified any hard bounds if (hardBounds != null) { @@ -160,26 +154,20 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol return LeafBucketCollector.NO_OP_COLLECTOR; } - // Need to be declared as final and array for usage within the - // LeafBucketCollectorBase subclass below - final boolean[] useOpt = new boolean[1]; - useOpt[0] = filters != null; + boolean optimized = FastFilterRewriteHelper.tryFastFilterAggregation( + ctx, + fastFilterContext, + (key, count) -> incrementBucketDocCount( + FastFilterRewriteHelper.getBucketOrd(bucketOrds.add(0, preparedRounding.round(key))), + count + ) + ); + if (optimized) throw new CollectionTerminatedException(); SortedNumericDocValues values = valuesSource.longValues(ctx); return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { - // Try fast filter aggregation if the filters have been created - // Skip if tried before and gave incorrect/incomplete results - if (useOpt[0]) { - useOpt[0] = FilterRewriteHelper.tryFastFilterAggregation(ctx, filters, fieldType, (key, count) -> { - incrementBucketDocCount( - FilterRewriteHelper.getBucketOrd(bucketOrds.add(owningBucketOrd, preparedRounding.round(key))), - count - ); - }); - } - if (values.advanceExact(doc)) { int valuesCount = values.docValueCount(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/FilterRewriteHelper.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/FilterRewriteHelper.java deleted file mode 100644 index 29cecd5b382cd..0000000000000 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/FilterRewriteHelper.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.aggregations.bucket.histogram; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.search.CollectionTerminatedException; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.PointRangeQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.NumericUtils; -import org.opensearch.common.CheckedFunction; -import org.opensearch.common.Rounding; -import org.opensearch.common.lucene.search.function.FunctionScoreQuery; -import org.opensearch.index.mapper.DateFieldMapper; -import org.opensearch.index.query.DateRangeIncludingNowQuery; -import org.opensearch.search.aggregations.support.FieldContext; -import org.opensearch.search.aggregations.support.ValuesSourceConfig; -import org.opensearch.search.internal.SearchContext; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.OptionalLong; -import java.util.function.BiConsumer; -import java.util.function.Function; -import java.util.function.Supplier; - -/** - * Helpers functions to rewrite and optimize aggregations using - * range filter queries - * - * @opensearch.internal - */ -public class FilterRewriteHelper { - - static class FilterContext { - final DateFieldMapper.DateFieldType fieldType; - final Weight[] filters; - - public FilterContext(DateFieldMapper.DateFieldType fieldType, Weight[] filters) { - this.fieldType = fieldType; - this.filters = filters; - } - } - - private static final int MAX_NUM_FILTER_BUCKETS = 1024; - private static final Map, Function> queryWrappers; - - // Initialize the wrappers map for unwrapping the query - static { - queryWrappers = new HashMap<>(); - queryWrappers.put(ConstantScoreQuery.class, q -> ((ConstantScoreQuery) q).getQuery()); - queryWrappers.put(FunctionScoreQuery.class, q -> ((FunctionScoreQuery) q).getSubQuery()); - queryWrappers.put(DateRangeIncludingNowQuery.class, q -> ((DateRangeIncludingNowQuery) q).getQuery()); - queryWrappers.put(IndexOrDocValuesQuery.class, q -> ((IndexOrDocValuesQuery) q).getIndexQuery()); - } - - /** - * Recursively unwraps query into the concrete form - * for applying the optimization - */ - private static Query unwrapIntoConcreteQuery(Query query) { - while (queryWrappers.containsKey(query.getClass())) { - query = queryWrappers.get(query.getClass()).apply(query); - } - - return query; - } - - /** - * Finds the min and max bounds for segments within the passed search context - */ - private static long[] getIndexBoundsFromLeaves(final SearchContext context, final String fieldName) throws IOException { - final List leaves = context.searcher().getIndexReader().leaves(); - long min = Long.MAX_VALUE, max = Long.MIN_VALUE; - // Since the query does not specify bounds for aggregation, we can - // build the global min/max from local min/max within each segment - for (LeafReaderContext leaf : leaves) { - final PointValues values = leaf.reader().getPointValues(fieldName); - if (values != null) { - min = Math.min(min, NumericUtils.sortableBytesToLong(values.getMinPackedValue(), 0)); - max = Math.max(max, NumericUtils.sortableBytesToLong(values.getMaxPackedValue(), 0)); - } - } - - if (min == Long.MAX_VALUE || max == Long.MIN_VALUE) return null; - - return new long[] { min, max }; - } - - static long[] getAggregationBounds(final SearchContext context, final String fieldName) throws IOException { - final Query cq = unwrapIntoConcreteQuery(context.query()); - final long[] indexBounds = getIndexBoundsFromLeaves(context, fieldName); - if (cq instanceof PointRangeQuery) { - final PointRangeQuery prq = (PointRangeQuery) cq; - // Ensure that the query and aggregation are on the same field - if (prq.getField().equals(fieldName)) { - return new long[] { - // Minimum bound for aggregation is the max between query and global - Math.max(NumericUtils.sortableBytesToLong(prq.getLowerPoint(), 0), indexBounds[0]), - // Maximum bound for aggregation is the min between query and global - Math.min(NumericUtils.sortableBytesToLong(prq.getUpperPoint(), 0), indexBounds[1]) }; - } - } else if (cq instanceof MatchAllDocsQuery) { - return indexBounds; - } - - return null; - } - - /** - * Creates the range query filters for aggregations using the interval, min/max - * bounds and the rounding values - */ - private static Weight[] createFilterForAggregations( - final SearchContext context, - final Rounding rounding, - final Rounding.Prepared preparedRounding, - final String field, - final DateFieldMapper.DateFieldType fieldType, - final long low, - final long high - ) throws IOException { - final OptionalLong intervalOpt = Rounding.getInterval(rounding); - if (intervalOpt.isEmpty()) { - return null; - } - - final long interval = intervalOpt.getAsLong(); - // Calculate the number of buckets using range and interval - long roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); - long prevRounded = roundedLow; - int bucketCount = 0; - while (roundedLow <= fieldType.convertNanosToMillis(high)) { - bucketCount++; - // Below rounding is needed as the interval could return in - // non-rounded values for something like calendar month - roundedLow = preparedRounding.round(roundedLow + interval); - if (prevRounded == roundedLow) break; - prevRounded = roundedLow; - } - - Weight[] filters = null; - if (bucketCount > 0 && bucketCount <= MAX_NUM_FILTER_BUCKETS) { - int i = 0; - filters = new Weight[bucketCount]; - roundedLow = preparedRounding.round(fieldType.convertNanosToMillis(low)); - while (i < bucketCount) { - // Calculate the lower bucket bound - final byte[] lower = new byte[8]; - NumericUtils.longToSortableBytes(i == 0 ? low : fieldType.convertRoundedMillisToNanos(roundedLow), lower, 0); - // Calculate the upper bucket bound - final byte[] upper = new byte[8]; - roundedLow = preparedRounding.round(roundedLow + interval); - // Subtract -1 if the minimum is roundedLow as roundedLow itself - // is included in the next bucket - NumericUtils.longToSortableBytes( - i + 1 == bucketCount ? high : fieldType.convertRoundedMillisToNanos(roundedLow) - 1, - upper, - 0 - ); - filters[i++] = context.searcher().createWeight(new PointRangeQuery(field, lower, upper, 1) { - @Override - protected String toString(int dimension, byte[] value) { - return null; - } - }, ScoreMode.COMPLETE_NO_SCORES, 1); - } - } - - return filters; - } - - static FilterContext buildFastFilterContext( - final Object parent, - final int subAggLength, - SearchContext context, - Function roundingFunction, - Supplier preparedRoundingSupplier, - ValuesSourceConfig valuesSourceConfig, - CheckedFunction computeBounds - ) throws IOException { - // Create the filters for fast aggregation only if the query is instance - // of point range query and there aren't any parent/sub aggregations - if (parent == null && subAggLength == 0 && valuesSourceConfig.missing() == null && valuesSourceConfig.script() == null) { - final FieldContext fieldContext = valuesSourceConfig.fieldContext(); - if (fieldContext != null) { - final String fieldName = fieldContext.field(); - final long[] bounds = computeBounds.apply(fieldContext); - if (bounds != null) { - assert fieldContext.fieldType() instanceof DateFieldMapper.DateFieldType; - final DateFieldMapper.DateFieldType fieldType = (DateFieldMapper.DateFieldType) fieldContext.fieldType(); - final Rounding rounding = roundingFunction.apply(bounds); - final Weight[] filters = FilterRewriteHelper.createFilterForAggregations( - context, - rounding, - preparedRoundingSupplier.get(), - fieldName, - fieldType, - bounds[0], - bounds[1] - ); - return new FilterContext(fieldType, filters); - } - } - } - return null; - } - - static long getBucketOrd(long bucketOrd) { - if (bucketOrd < 0) { // already seen - bucketOrd = -1 - bucketOrd; - } - - return bucketOrd; - } - - static boolean tryFastFilterAggregation( - final LeafReaderContext ctx, - final Weight[] filters, - final DateFieldMapper.DateFieldType fieldType, - final BiConsumer incrementDocCount - ) throws IOException { - final int[] counts = new int[filters.length]; - int i; - for (i = 0; i < filters.length; i++) { - counts[i] = filters[i].count(ctx); - if (counts[i] == -1) { - // Cannot use the optimization if any of the counts - // is -1 indicating the segment might have deleted documents - return false; - } - } - - for (i = 0; i < filters.length; i++) { - if (counts[i] > 0) { - incrementDocCount.accept( - fieldType.convertNanosToMillis( - NumericUtils.sortableBytesToLong(((PointRangeQuery) filters[i].getQuery()).getLowerPoint(), 0) - ), - counts[i] - ); - } - } - throw new CollectionTerminatedException(); - } -} diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java index eabc4b7764eed..b581e552fec4f 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java @@ -1279,7 +1279,7 @@ public void testWithDateHistogram() throws IOException { }, (result) -> { assertEquals(3, result.getBuckets().size()); - assertEquals("{date=1508457600000}", result.afterKey().toString()); + assertEquals("{date=1508457600000}", result.afterKey().toString()); // 2017-10-20T00:00:00 assertEquals("{date=1474329600000}", result.getBuckets().get(0).getKeyAsString()); assertEquals(2L, result.getBuckets().get(0).getDocCount()); assertEquals("{date=1508371200000}", result.getBuckets().get(1).getKeyAsString()); @@ -1300,9 +1300,8 @@ public void testWithDateHistogram() throws IOException { DateHistogramValuesSourceBuilder histo = new DateHistogramValuesSourceBuilder("date").field("date") .calendarInterval(DateHistogramInterval.days(1)); return new CompositeAggregationBuilder("name", Collections.singletonList(histo)).aggregateAfter( - createAfterKey("date", 1474329600000L) + createAfterKey("date", 1474329600000L) // 2016-09-20T00:00:00 ); - }, (result) -> { assertEquals(2, result.getBuckets().size()); @@ -2242,21 +2241,20 @@ private , V extends Comparable> void testRandomTerms( Function transformKey ) throws IOException { int numTerms = randomIntBetween(10, 500); - List terms = new ArrayList<>(); + List terms = new ArrayList<>(); // possible values for the terms for (int i = 0; i < numTerms; i++) { terms.add(randomSupplier.get()); } int numDocs = randomIntBetween(100, 200); List>> dataset = new ArrayList<>(); - - Set valuesSet = new HashSet<>(); - Map, AtomicLong> expectedDocCounts = new HashMap<>(); + Set valuesSet = new HashSet<>(); // how many different values + Map, AtomicLong> expectedDocCounts = new HashMap<>(); // how many docs for each value for (int i = 0; i < numDocs; i++) { int numValues = randomIntBetween(1, 5); Set values = new HashSet<>(); for (int j = 0; j < numValues; j++) { int rand = randomIntBetween(0, terms.size() - 1); - if (values.add(terms.get(rand))) { + if (values.add(terms.get(rand))) { // values are unique for one doc AtomicLong count = expectedDocCounts.computeIfAbsent(terms.get(rand), (k) -> new AtomicLong(0)); count.incrementAndGet(); valuesSet.add(terms.get(rand)); @@ -2264,9 +2262,8 @@ private , V extends Comparable> void testRandomTerms( } dataset.add(Collections.singletonMap(field, new ArrayList<>(values))); } - List expected = new ArrayList<>(valuesSet); + List expected = new ArrayList<>(valuesSet); // how many buckets expected Collections.sort(expected); - List> seen = new ArrayList<>(); AtomicBoolean finish = new AtomicBoolean(false); int size = randomIntBetween(1, expected.size());