From 7769ce5a9310e56169b7310cca71f1994ee4f8cc Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Tue, 6 Aug 2024 11:30:41 -0400 Subject: [PATCH 01/10] Bump org.bouncycastle:bcpg-fips from 1.0.7.1 to 2.0.8 and org.bouncycastle:bc-fips from 1.0.2.5 to 2.0.0 (#15122) * Bump org.bouncycastle:bcpg-fips from 1.0.7.1 to 2.0.8 and org.bouncycastle:bc-fips from 1.0.2.5 to 2.0.0 in /distribution/tools/plugin-cli Signed-off-by: Craig Perkins * Add to CHANGELOG Signed-off-by: Craig Perkins --------- Signed-off-by: Craig Perkins --- CHANGELOG.md | 1 + distribution/tools/plugin-cli/build.gradle | 31 ++----------------- .../licenses/bc-fips-1.0.2.5.jar.sha1 | 1 - .../licenses/bc-fips-2.0.0.jar.sha1 | 1 + .../licenses/bcpg-fips-1.0.7.1.jar.sha1 | 1 - .../licenses/bcpg-fips-2.0.8.jar.sha1 | 1 + 6 files changed, 5 insertions(+), 31 deletions(-) delete mode 100644 distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 create mode 100644 distribution/tools/plugin-cli/licenses/bc-fips-2.0.0.jar.sha1 delete mode 100644 distribution/tools/plugin-cli/licenses/bcpg-fips-1.0.7.1.jar.sha1 create mode 100644 distribution/tools/plugin-cli/licenses/bcpg-fips-2.0.8.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c7d7aa9bf780..061e3280852e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) - Bump `actions/setup-java` from 1 to 4 ([#15104](https://github.com/opensearch-project/OpenSearch/pull/15104)) - Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) +- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.8 and `org.bouncycastle:bc-fips` from 1.0.2.5 to 2.0.0 in /distribution/tools/plugin-cli ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103)) ### Changed - Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) diff --git a/distribution/tools/plugin-cli/build.gradle b/distribution/tools/plugin-cli/build.gradle index 3083ad4375460..a619ba1acf6a7 100644 --- a/distribution/tools/plugin-cli/build.gradle +++ b/distribution/tools/plugin-cli/build.gradle @@ -37,8 +37,8 @@ base { dependencies { compileOnly project(":server") compileOnly project(":libs:opensearch-cli") - api "org.bouncycastle:bcpg-fips:1.0.7.1" - api "org.bouncycastle:bc-fips:1.0.2.5" + api "org.bouncycastle:bcpg-fips:2.0.8" + api "org.bouncycastle:bc-fips:2.0.0" testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.3.0' testRuntimeOnly("com.google.guava:guava:${versions.guava}") { @@ -58,33 +58,6 @@ test { jvmArgs += [ "-Djava.security.egd=file:/dev/urandom" ] } -/* - * these two classes intentionally use the following JDK internal APIs in order to offer the necessary - * functionality - * - * sun.security.internal.spec.TlsKeyMaterialParameterSpec - * sun.security.internal.spec.TlsKeyMaterialSpec - * sun.security.internal.spec.TlsMasterSecretParameterSpec - * sun.security.internal.spec.TlsPrfParameterSpec - * sun.security.internal.spec.TlsRsaPremasterSecretParameterSpec - * sun.security.provider.SecureRandom - * - */ -thirdPartyAudit.ignoreViolations( - 'org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider$CoreSecureRandom', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$BaseTLSKeyGeneratorSpi', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSKeyMaterialGenerator', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSKeyMaterialGenerator$2', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSMasterSecretGenerator', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSMasterSecretGenerator$2', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSPRFKeyGenerator', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSRsaPreMasterSecretGenerator', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSRsaPreMasterSecretGenerator$2', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSExtendedMasterSecretGenerator', - 'org.bouncycastle.jcajce.provider.ProvSunTLSKDF$TLSExtendedMasterSecretGenerator$2' -) - thirdPartyAudit.ignoreMissingClasses( 'org.brotli.dec.BrotliInputStream', 'org.objectweb.asm.AnnotationVisitor', diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 deleted file mode 100644 index 1b44c77dd4ee1..0000000000000 --- a/distribution/tools/plugin-cli/licenses/bc-fips-1.0.2.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -704e65f7e4fe679e5ab2aa8a840f27f8ced4c522 \ No newline at end of file diff --git a/distribution/tools/plugin-cli/licenses/bc-fips-2.0.0.jar.sha1 b/distribution/tools/plugin-cli/licenses/bc-fips-2.0.0.jar.sha1 new file mode 100644 index 0000000000000..79f0e3e9930bb --- /dev/null +++ b/distribution/tools/plugin-cli/licenses/bc-fips-2.0.0.jar.sha1 @@ -0,0 +1 @@ +ee9ac432cf08f9a9ebee35d7cf8a45f94959a7ab \ No newline at end of file diff --git a/distribution/tools/plugin-cli/licenses/bcpg-fips-1.0.7.1.jar.sha1 b/distribution/tools/plugin-cli/licenses/bcpg-fips-1.0.7.1.jar.sha1 deleted file mode 100644 index 44cebc7c92d87..0000000000000 --- a/distribution/tools/plugin-cli/licenses/bcpg-fips-1.0.7.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5e1952428655ea822066f86df2e3ecda8fa0ba2b \ No newline at end of file diff --git a/distribution/tools/plugin-cli/licenses/bcpg-fips-2.0.8.jar.sha1 b/distribution/tools/plugin-cli/licenses/bcpg-fips-2.0.8.jar.sha1 new file mode 100644 index 0000000000000..758ee2fdf9de6 --- /dev/null +++ b/distribution/tools/plugin-cli/licenses/bcpg-fips-2.0.8.jar.sha1 @@ -0,0 +1 @@ +51c2f633e0c32d10de1ebab4c86f93310ff820f8 \ No newline at end of file From 76b9931299b4a45308b7ede4659e750eacd5006a Mon Sep 17 00:00:00 2001 From: David Zane <38449481+dzane17@users.noreply.github.com> Date: Tue, 6 Aug 2024 09:33:34 -0700 Subject: [PATCH 02/10] Add took time to request nodes stats (#15054) Signed-off-by: David Zane --- CHANGELOG-3.0.md | 1 + .../action/search/SearchRequestStats.java | 31 ++++++++++++++++ .../index/search/stats/SearchStats.java | 24 ++++++++++++- .../search/SearchRequestStatsTests.java | 35 +++++++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index 48d978bede420..78e93eed0158a 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) - Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625)) - Views, simplify data access and manipulation by providing a virtual layer over one or more indices ([#11957](https://github.com/opensearch-project/OpenSearch/pull/11957)) +- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) ### Dependencies diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java b/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java index 97ef94055faf7..d1d5f568fc09d 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestStats.java @@ -27,6 +27,7 @@ @PublicApi(since = "2.11.0") public final class SearchRequestStats extends SearchRequestOperationsListener { Map phaseStatsMap = new EnumMap<>(SearchPhaseName.class); + StatsHolder tookStatsHolder; public static final String SEARCH_REQUEST_STATS_ENABLED_KEY = "search.request_stats_enabled"; public static final Setting SEARCH_REQUEST_STATS_ENABLED = Setting.boolSetting( @@ -40,6 +41,7 @@ public final class SearchRequestStats extends SearchRequestOperationsListener { public SearchRequestStats(ClusterSettings clusterSettings) { this.setEnabled(clusterSettings.get(SEARCH_REQUEST_STATS_ENABLED)); clusterSettings.addSettingsUpdateConsumer(SEARCH_REQUEST_STATS_ENABLED, this::setEnabled); + tookStatsHolder = new StatsHolder(); for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { phaseStatsMap.put(searchPhaseName, new StatsHolder()); } @@ -57,6 +59,18 @@ public long getPhaseMetric(SearchPhaseName searchPhaseName) { return phaseStatsMap.get(searchPhaseName).timing.sum(); } + public long getTookCurrent() { + return tookStatsHolder.current.count(); + } + + public long getTookTotal() { + return tookStatsHolder.total.count(); + } + + public long getTookMetric() { + return tookStatsHolder.timing.sum(); + } + @Override protected void onPhaseStart(SearchPhaseContext context) { phaseStatsMap.get(context.getCurrentPhase().getSearchPhaseName()).current.inc(); @@ -75,6 +89,23 @@ protected void onPhaseFailure(SearchPhaseContext context, Throwable cause) { phaseStatsMap.get(context.getCurrentPhase().getSearchPhaseName()).current.dec(); } + @Override + protected void onRequestStart(SearchRequestContext searchRequestContext) { + tookStatsHolder.current.inc(); + } + + @Override + protected void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + tookStatsHolder.current.dec(); + tookStatsHolder.total.inc(); + tookStatsHolder.timing.inc(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - searchRequestContext.getAbsoluteStartNanos())); + } + + @Override + protected void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + tookStatsHolder.current.dec(); + } + /** * Holder of statistics values * diff --git a/server/src/main/java/org/opensearch/index/search/stats/SearchStats.java b/server/src/main/java/org/opensearch/index/search/stats/SearchStats.java index bb61e1afa05f4..d6ea803c9ee13 100644 --- a/server/src/main/java/org/opensearch/index/search/stats/SearchStats.java +++ b/server/src/main/java/org/opensearch/index/search/stats/SearchStats.java @@ -110,7 +110,7 @@ public void writeTo(StreamOutput out) throws IOException { } /** - * Holds requests stats for different phases. + * Holds all requests stats. * * @opensearch.api */ @@ -124,6 +124,7 @@ public Map getRequestStatsHolder() { } RequestStatsLongHolder() { + requestStatsHolder.put(Fields.TOOK, new PhaseStatsLongHolder()); for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { requestStatsHolder.put(searchPhaseName.getName(), new PhaseStatsLongHolder()); } @@ -512,6 +513,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (requestStatsLongHolder != null) { builder.startObject(Fields.REQUEST); + PhaseStatsLongHolder tookStatsLongHolder = requestStatsLongHolder.requestStatsHolder.get(Fields.TOOK); + if (tookStatsLongHolder != null) { + builder.startObject(Fields.TOOK); + builder.humanReadableField(Fields.TIME_IN_MILLIS, Fields.TIME, new TimeValue(tookStatsLongHolder.timeInMillis)); + builder.field(Fields.CURRENT, tookStatsLongHolder.current); + builder.field(Fields.TOTAL, tookStatsLongHolder.total); + builder.endObject(); + } + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { PhaseStatsLongHolder statsLongHolder = requestStatsLongHolder.requestStatsHolder.get(searchPhaseName.getName()); if (statsLongHolder == null) { @@ -545,6 +555,17 @@ public void setSearchRequestStats(SearchRequestStats searchRequestStats) { totalStats.requestStatsLongHolder = new RequestStatsLongHolder(); } + // Set took stats + totalStats.requestStatsLongHolder.requestStatsHolder.put( + Fields.TOOK, + new PhaseStatsLongHolder( + searchRequestStats.getTookCurrent(), + searchRequestStats.getTookTotal(), + searchRequestStats.getTookMetric() + ) + ); + + // Set phase stats for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { totalStats.requestStatsLongHolder.requestStatsHolder.put( searchPhaseName.getName(), @@ -678,6 +699,7 @@ static final class Fields { static final String CURRENT = "current"; static final String TOTAL = "total"; static final String SEARCH_IDLE_REACTIVATE_COUNT_TOTAL = "search_idle_reactivate_count_total"; + static final String TOOK = "took"; } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java index 1af3eb2738a58..3bad3ec3e7d21 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java @@ -25,6 +25,41 @@ import static org.mockito.Mockito.when; public class SearchRequestStatsTests extends OpenSearchTestCase { + public void testSearchRequestStats_OnRequestFailure() { + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + SearchRequestStats testRequestStats = new SearchRequestStats(clusterSettings); + SearchPhaseContext mockSearchPhaseContext = mock(SearchPhaseContext.class); + SearchRequestContext mockSearchRequestContext = mock(SearchRequestContext.class); + + testRequestStats.onRequestStart(mockSearchRequestContext); + assertEquals(1, testRequestStats.getTookCurrent()); + testRequestStats.onRequestFailure(mockSearchPhaseContext, mockSearchRequestContext); + assertEquals(0, testRequestStats.getTookCurrent()); + assertEquals(0, testRequestStats.getTookTotal()); + } + + public void testSearchRequestStats_OnRequestEnd() { + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + SearchRequestStats testRequestStats = new SearchRequestStats(clusterSettings); + SearchPhaseContext mockSearchPhaseContext = mock(SearchPhaseContext.class); + SearchRequestContext mockSearchRequestContext = mock(SearchRequestContext.class); + + // Start request + testRequestStats.onRequestStart(mockSearchRequestContext); + assertEquals(1, testRequestStats.getTookCurrent()); + + // Mock start time + long tookTimeInMillis = randomIntBetween(1, 10); + long startTimeInNanos = System.nanoTime() - TimeUnit.MILLISECONDS.toNanos(tookTimeInMillis); + when(mockSearchRequestContext.getAbsoluteStartNanos()).thenReturn(startTimeInNanos); + + // End request + testRequestStats.onRequestEnd(mockSearchPhaseContext, mockSearchRequestContext); + assertEquals(0, testRequestStats.getTookCurrent()); + assertEquals(1, testRequestStats.getTookTotal()); + assertThat(testRequestStats.getTookMetric(), greaterThanOrEqualTo(tookTimeInMillis)); + } + public void testSearchRequestPhaseFailure() { ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); SearchRequestStats testRequestStats = new SearchRequestStats(clusterSettings); From c7254315b7d801951547b5f4bd3f2c89ac484c71 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Wed, 7 Aug 2024 01:17:01 +0800 Subject: [PATCH 03/10] Fix version check for adding rangeQuery and regexpQuery support for constant_keyword field type (#15127) Signed-off-by: Gao Binlong --- .../{110_constant_keyword.yml => 115_constant_keyword.yml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename rest-api-spec/src/main/resources/rest-api-spec/test/index/{110_constant_keyword.yml => 115_constant_keyword.yml} (98%) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/115_constant_keyword.yml similarity index 98% rename from rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/index/115_constant_keyword.yml index 1c50187534026..e60981dbbf50c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/115_constant_keyword.yml @@ -70,8 +70,8 @@ --- "Queries": - skip: - version: " - 2.99.99" - reason: "rangeQuery and regexpQuery are supported in 3.0.0 in main branch" + version: " - 2.16.99" + reason: "rangeQuery and regexpQuery are introduced in 2.17.0" - do: indices.create: From 2829a89f1484cc92e74e56a2695f691e375948c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 15:16:59 -0400 Subject: [PATCH 04/10] Bump com.azure:azure-core from 1.49.1 to 1.51.0 in /plugins/repository-azure (#15111) * Bump com.azure:azure-core in /plugins/repository-azure Bumps [com.azure:azure-core](https://github.com/Azure/azure-sdk-for-java) from 1.49.1 to 1.51.0. - [Release notes](https://github.com/Azure/azure-sdk-for-java/releases) - [Commits](https://github.com/Azure/azure-sdk-for-java/compare/azure-core_1.49.1...azure-core_1.51.0) --- updated-dependencies: - dependency-name: com.azure:azure-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-azure/build.gradle | 2 +- plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 | 1 - plugins/repository-azure/licenses/azure-core-1.51.0.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 create mode 100644 plugins/repository-azure/licenses/azure-core-1.51.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 061e3280852e8..5d1650c8341a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `actions/setup-java` from 1 to 4 ([#15104](https://github.com/opensearch-project/OpenSearch/pull/15104)) - Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) - Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.8 and `org.bouncycastle:bc-fips` from 1.0.2.5 to 2.0.0 in /distribution/tools/plugin-cli ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103)) +- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) ### Changed - Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index 15e3158f2dbc4..80809e067f65a 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -44,7 +44,7 @@ opensearchplugin { } dependencies { - api 'com.azure:azure-core:1.49.1' + api 'com.azure:azure-core:1.51.0' api 'com.azure:azure-json:1.1.0' api 'com.azure:azure-xml:1.0.0' api 'com.azure:azure-storage-common:12.25.1' diff --git a/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 b/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 deleted file mode 100644 index d487c08c26e94..0000000000000 --- a/plugins/repository-azure/licenses/azure-core-1.49.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a7c44282eaa0f5a3be4b920d6a057509adfe8674 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-core-1.51.0.jar.sha1 b/plugins/repository-azure/licenses/azure-core-1.51.0.jar.sha1 new file mode 100644 index 0000000000000..7200f59af2f9a --- /dev/null +++ b/plugins/repository-azure/licenses/azure-core-1.51.0.jar.sha1 @@ -0,0 +1 @@ +ff5d0aedf75ca45ec0ace24673f790d2f7a57096 \ No newline at end of file From f980924136e4d689581c2346d3def8580c178087 Mon Sep 17 00:00:00 2001 From: Rishabh Singh Date: Tue, 6 Aug 2024 13:29:15 -0700 Subject: [PATCH 05/10] Add baseline-cluster-config key to benchmark config (#15134) Signed-off-by: Rishabh Singh --- .github/benchmark-configs.json | 34 +++++++++++++------- .github/workflows/benchmark-pull-request.yml | 2 ++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.github/benchmark-configs.json b/.github/benchmark-configs.json index 5b44198cd3b8e..8f4bad040fe44 100644 --- a/.github/benchmark-configs.json +++ b/.github/benchmark-configs.json @@ -14,7 +14,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-single-node-1-shard-0-replica-baseline" }, "id_2": { "description": "Indexing only configuration for HTTP_LOGS workload", @@ -30,7 +31,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-single-node-1-shard-0-replica-baseline" }, "id_3": { "description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-3.0.0", @@ -46,7 +48,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_4": { "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0", @@ -62,10 +65,11 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_5": { - "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0", + "description": "Search only test-procedure for big5, uses snapshot to restore the data for OS-3.0.0", "supported_major_versions": ["3"], "cluster-benchmark-configs": { "SINGLE_NODE_CLUSTER": "true", @@ -78,7 +82,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_6": { "description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-2.x", @@ -94,7 +99,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_7": { "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x", @@ -110,10 +116,11 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_8": { - "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x", + "description": "Search only test-procedure for big5, uses snapshot to restore the data for OS-2.x", "supported_major_versions": ["2"], "cluster-benchmark-configs": { "SINGLE_NODE_CLUSTER": "true", @@ -126,7 +133,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" }, "id_9": { "description": "Indexing and search configuration for pmc workload", @@ -141,7 +149,8 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-single-node-1-shard-0-replica-baseline" }, "id_10": { "description": "Indexing only configuration for stack-overflow workload", @@ -156,6 +165,7 @@ "cluster_configuration": { "size": "Single-Node", "data_instance_config": "4vCPU, 32G Mem, 16G Heap" - } + }, + "baseline_cluster_config": "x64-r5.xlarge-single-node-1-shard-0-replica-baseline" } } diff --git a/.github/workflows/benchmark-pull-request.yml b/.github/workflows/benchmark-pull-request.yml index 2a54c2072de59..1096014e4a291 100644 --- a/.github/workflows/benchmark-pull-request.yml +++ b/.github/workflows/benchmark-pull-request.yml @@ -60,6 +60,8 @@ jobs: for (const [key, value] of Object.entries(clusterBenchmarkConfigs)) { core.exportVariable(key, value); } + if (benchmarkConfigs[configId].hasOwnProperty('baseline_cluster_config')) { + core.exportVariable('BASELINE_CLUSTER_CONFIG', benchmarkConfigs[configId]['baseline_cluster_config']); - name: Post invalid format comment if: steps.check_comment.outputs.invalid == 'true' uses: actions/github-script@v7 From b47b401b5a3c15304fc07b2bad9621c9dea122da Mon Sep 17 00:00:00 2001 From: Liyun Xiu Date: Wed, 7 Aug 2024 05:31:12 +0800 Subject: [PATCH 06/10] Fix bulk ingest NPE with empty pipeline (#15033) Signed-off-by: Liyun Xiu --- CHANGELOG.md | 1 + .../org/opensearch/ingest/IngestService.java | 10 +++++- .../opensearch/ingest/IngestServiceTests.java | 36 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d1650c8341a7..be5e5598b09c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) +- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) - Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) - Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) diff --git a/server/src/main/java/org/opensearch/ingest/IngestService.java b/server/src/main/java/org/opensearch/ingest/IngestService.java index 17eb23422e68b..938ca7493926e 100644 --- a/server/src/main/java/org/opensearch/ingest/IngestService.java +++ b/server/src/main/java/org/opensearch/ingest/IngestService.java @@ -997,7 +997,7 @@ private void innerBatchExecute( Consumer> handler ) { if (pipeline.getProcessors().isEmpty()) { - handler.accept(null); + handler.accept(toIngestDocumentWrappers(slots, indexRequests)); return; } @@ -1271,6 +1271,14 @@ private static IngestDocumentWrapper toIngestDocumentWrapper(int slot, IndexRequ return new IngestDocumentWrapper(slot, toIngestDocument(indexRequest), null); } + private static List toIngestDocumentWrappers(List slots, List indexRequests) { + List ingestDocumentWrappers = new ArrayList<>(); + for (int i = 0; i < slots.size(); ++i) { + ingestDocumentWrappers.add(toIngestDocumentWrapper(slots.get(i), indexRequests.get(i))); + } + return ingestDocumentWrappers; + } + private static Map createSlotIndexRequestMap(List slots, List indexRequests) { Map slotIndexRequestMap = new HashMap<>(); for (int i = 0; i < slots.size(); ++i) { diff --git a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java index 166b94966196c..1f4b1d635d438 100644 --- a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java @@ -1995,6 +1995,42 @@ public void testExecuteBulkRequestInBatchWithDefaultBatchSize() { verify(mockCompoundProcessor, never()).execute(any(), any()); } + public void testExecuteEmptyPipelineInBatch() throws Exception { + IngestService ingestService = createWithProcessors(emptyMap()); + PutPipelineRequest putRequest = new PutPipelineRequest( + "_id", + new BytesArray("{\"processors\": [], \"description\": \"_description\"}"), + MediaTypeRegistry.JSON + ); + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); // Start empty + ClusterState previousClusterState = clusterState; + clusterState = IngestService.innerPut(putRequest, clusterState); + ingestService.applyClusterState(new ClusterChangedEvent("", clusterState, previousClusterState)); + BulkRequest bulkRequest = new BulkRequest(); + IndexRequest indexRequest1 = new IndexRequest("_index").id("_id1").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest1); + IndexRequest indexRequest2 = new IndexRequest("_index").id("_id2").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest2); + IndexRequest indexRequest3 = new IndexRequest("_index").id("_id3").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest3); + IndexRequest indexRequest4 = new IndexRequest("_index").id("_id4").source(emptyMap()).setPipeline("_id").setFinalPipeline("_none"); + bulkRequest.add(indexRequest4); + bulkRequest.batchSize(4); + final Map failureHandler = new HashMap<>(); + final Map completionHandler = new HashMap<>(); + ingestService.executeBulkRequest( + 4, + bulkRequest.requests(), + failureHandler::put, + completionHandler::put, + indexReq -> {}, + Names.WRITE, + bulkRequest + ); + assertTrue(failureHandler.isEmpty()); + assertEquals(Set.of(Thread.currentThread()), completionHandler.keySet()); + } + public void testPrepareBatches_same_index_pipeline() { IngestService.IndexRequestWrapper wrapper1 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); IngestService.IndexRequestWrapper wrapper2 = createIndexRequestWrapper("index1", Collections.singletonList("p1")); From 212597e41717d1474b143721a29fd6b6e9f8f2fd Mon Sep 17 00:00:00 2001 From: Jay Deng Date: Tue, 6 Aug 2024 15:26:12 -0700 Subject: [PATCH 07/10] CODEOWNERS personalizations for jed326 (#15137) Signed-off-by: Jay Deng --- .github/CODEOWNERS | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1aefeee710f47..fb7d73f599670 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,15 +13,25 @@ # Default ownership for all repo files * @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/modules/lang-painless/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/modules/parent-join/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah /modules/transport-netty4/ @peternied /plugins/identity-shiro/ @peternied +/server/src/internalClusterTest/java/org/opensearch/index/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/server/src/internalClusterTest/java/org/opensearch/search/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah + /server/src/main/java/org/opensearch/extensions/ @peternied /server/src/main/java/org/opensearch/identity/ @peternied -/server/src/main/java/org/opensearch/threadpool/ @peternied +/server/src/main/java/org/opensearch/index/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/server/src/main/java/org/opensearch/search/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/server/src/main/java/org/opensearch/threadpool/ @jed326 @peternied /server/src/main/java/org/opensearch/transport/ @peternied -/.github/ @peternied +/server/src/test/java/org/opensearch/index/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah +/server/src/test/java/org/opensearch/search/ @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah + +/.github/ @jed326 @peternied /MAINTAINERS.md @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gaobinlong @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah From a918530397e3e70d5171d879b2238b3005e5c66f Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Wed, 7 Aug 2024 12:27:21 +0530 Subject: [PATCH 08/10] Add changes to build star tree in off heap (#14817) --------- Signed-off-by: Bharathwaj G --- .../index/mapper/StarTreeMapperIT.java | 4 +- .../common/util/ByteArrayBackedBitset.java | 86 +++ .../composite/Composite99DocValuesWriter.java | 8 +- .../aggregators/CountValueAggregator.java | 5 + .../aggregators/SumValueAggregator.java | 5 + .../startree/aggregators/ValueAggregator.java | 5 + .../builder/AbstractDocumentsFileManager.java | 231 ++++++++ .../startree/builder/BaseStarTreeBuilder.java | 48 +- .../builder/OffHeapStarTreeBuilder.java | 334 ++++++++++++ .../builder/OnHeapStarTreeBuilder.java | 24 +- .../builder/SegmentDocsFileManager.java | 103 ++++ .../builder/StarTreeDocsFileManager.java | 294 ++++++++++ .../startree/builder/StarTreesBuilder.java | 13 +- .../utils/StarTreeDocumentBitSetUtil.java | 57 ++ .../utils/StarTreeDocumentsSorter.java | 66 +++ .../datacube/startree/utils/TreeNode.java | 4 + .../index/mapper/StarTreeMapper.java | 3 +- .../builder/AbstractStarTreeBuilderTests.java | 513 ++++++++++++++++-- .../builder/OffHeapStarTreeBuilderTests.java | 26 + .../builder/StarTreesBuilderTests.java | 10 +- .../SequentialDocValuesIteratorTests.java | 2 - .../StarTreeDocumentBitSetUtilTests.java | 72 +++ .../utils/StarTreeDocumentsSorterTests.java | 201 +++++++ .../index/mapper/StarTreeMapperTests.java | 4 +- 24 files changed, 2028 insertions(+), 90 deletions(-) create mode 100644 server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java index 8e5193b650868..1cabb8b617ce3 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java @@ -275,7 +275,7 @@ public void testValidCompositeIndex() { assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, + StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); @@ -359,7 +359,7 @@ public void testUpdateIndexWhenMappingIsSame() { assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals( - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, + StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); diff --git a/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java b/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java new file mode 100644 index 0000000000000..2d7948d414937 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/util/ByteArrayBackedBitset.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.util; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; + +import java.io.IOException; + +/** + * A bitset backed by a byte array. This will initialize and set bits in the byte array based on the index. + */ +public class ByteArrayBackedBitset { + private final byte[] byteArray; + + /** + * Constructor which uses an on heap list. This should be using during construction of the bitset. + */ + public ByteArrayBackedBitset(int capacity) { + byteArray = new byte[capacity]; + } + + /** + * Constructor which set the Lucene's RandomAccessInput to read the bitset into a read-only buffer. + */ + public ByteArrayBackedBitset(RandomAccessInput in, long offset, int length) throws IOException { + byteArray = new byte[length]; + int i = 0; + while (i < length) { + byteArray[i] = in.readByte(offset + i); + i++; + } + } + + /** + * Constructor which set the Lucene's IndexInput to read the bitset into a read-only buffer. + */ + public ByteArrayBackedBitset(IndexInput in, int length) throws IOException { + byteArray = new byte[length]; + int i = 0; + while (i < length) { + byteArray[i] = in.readByte(); + i++; + } + } + + /** + * Sets the bit at the given index to 1. + * Each byte can indicate 8 bits, so the index is divided by 8 to get the byte array index. + * @param index the index to set the bit + */ + public void set(int index) { + int byteArrIndex = index >> 3; + byteArray[byteArrIndex] |= (byte) (1 << (index & 7)); + } + + public int write(IndexOutput output) throws IOException { + int numBytes = 0; + for (Byte bitSet : byteArray) { + output.writeByte(bitSet); + numBytes += Byte.BYTES; + } + return numBytes; + } + + /** + * Retrieves whether the bit is set or not at the given index. + * @param index the index to look up for the bit + * @return true if bit is set, false otherwise + */ + public boolean get(int index) throws IOException { + int byteArrIndex = index >> 3; + return (byteArray[byteArrIndex] & (1 << (index & 7))) != 0; + } + + public int getCurrBytesRead() { + return byteArray.length; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java index 3859d3c998573..6ed1a8c42e380 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java @@ -8,8 +8,6 @@ package org.opensearch.index.codec.composite; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; @@ -50,9 +48,9 @@ public class Composite99DocValuesWriter extends DocValuesConsumer { private final Set compositeMappedFieldTypes; private final Set compositeFieldSet; private final Set segmentFieldSet; + private final boolean segmentHasCompositeFields; private final Map fieldProducerMap = new HashMap<>(); - private static final Logger logger = LogManager.getLogger(Composite99DocValuesWriter.class); public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { @@ -70,6 +68,8 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState for (CompositeMappedFieldType type : compositeMappedFieldTypes) { compositeFieldSet.addAll(type.fields()); } + // check if there are any composite fields which are part of the segment + segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false; } @Override @@ -91,7 +91,7 @@ public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) th public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { delegate.addSortedNumericField(field, valuesProducer); // Perform this only during flush flow - if (mergeState.get() == null) { + if (mergeState.get() == null && segmentHasCompositeFields) { createCompositeIndicesIfPossible(valuesProducer, field); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index 5390b6728b9b6..ed159ee2efb7b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -68,4 +68,9 @@ public Long toLongValue(Long value) { public Long toStarTreeNumericTypeValue(Long value) { return value; } + + @Override + public Long getIdentityMetricValue() { + return 0L; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index 385549216e4d6..a471f0e2bd960 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -103,4 +103,9 @@ public Double toStarTreeNumericTypeValue(Long value) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } } + + @Override + public Double getIdentityMetricValue() { + return 0D; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 93230ed012b13..048582cc530e5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -61,4 +61,9 @@ public interface ValueAggregator { * Converts an aggregated value from a Long type. */ A toStarTreeNumericTypeValue(Long rawValue); + + /** + * Fetches a value that does not alter the result of aggregations + */ + A getIdentityMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java new file mode 100644 index 0000000000000..78c49dbada6b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java @@ -0,0 +1,231 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericTypeConverters; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentBitSetUtil; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +/** + * Abstract class for managing star tree file operations. + * + * @opensearch.experimental + */ +@ExperimentalApi +public abstract class AbstractDocumentsFileManager implements Closeable { + private static final Logger logger = LogManager.getLogger(AbstractDocumentsFileManager.class); + protected final StarTreeField starTreeField; + protected final List metricAggregatorInfos; + protected final int numMetrics; + protected final TrackingDirectoryWrapper tmpDirectory; + protected final SegmentWriteState state; + protected int docSizeInBytes = -1; + + public AbstractDocumentsFileManager( + SegmentWriteState state, + StarTreeField starTreeField, + List metricAggregatorInfos + ) { + this.starTreeField = starTreeField; + this.tmpDirectory = new TrackingDirectoryWrapper(state.directory); + this.metricAggregatorInfos = metricAggregatorInfos; + this.state = state; + numMetrics = metricAggregatorInfos.size(); + } + + private void setDocSizeInBytes(int numBytes) { + if (docSizeInBytes == -1) { + docSizeInBytes = numBytes; + } + assert docSizeInBytes == numBytes; + } + + /** + * Write the star tree document to file associated with dimensions and metrics + */ + protected int writeStarTreeDocument(StarTreeDocument starTreeDocument, IndexOutput output, boolean isAggregatedDoc) throws IOException { + int numBytes = writeDimensions(starTreeDocument, output); + numBytes += writeMetrics(starTreeDocument, output, isAggregatedDoc); + setDocSizeInBytes(numBytes); + return numBytes; + } + + /** + * Write dimensions to file + */ + protected int writeDimensions(StarTreeDocument starTreeDocument, IndexOutput output) throws IOException { + int numBytes = 0; + for (int i = 0; i < starTreeDocument.dimensions.length; i++) { + output.writeLong(starTreeDocument.dimensions[i] == null ? 0L : starTreeDocument.dimensions[i]); + numBytes += Long.BYTES; + } + numBytes += StarTreeDocumentBitSetUtil.writeBitSet(starTreeDocument.dimensions, output); + return numBytes; + } + + /** + * Write star tree document metrics to file + */ + protected int writeMetrics(StarTreeDocument starTreeDocument, IndexOutput output, boolean isAggregatedDoc) throws IOException { + int numBytes = 0; + for (int i = 0; i < starTreeDocument.metrics.length; i++) { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); + numBytes += Long.BYTES; + break; + case DOUBLE: + if (isAggregatedDoc) { + long val = NumericUtils.doubleToSortableLong( + starTreeDocument.metrics[i] == null ? 0.0 : (Double) starTreeDocument.metrics[i] + ); + output.writeLong(val); + } else { + output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); + } + numBytes += Long.BYTES; + break; + default: + throw new IllegalStateException("Unsupported metric type"); + } + } + numBytes += StarTreeDocumentBitSetUtil.writeBitSet(starTreeDocument.metrics, output); + return numBytes; + } + + /** + * Reads the star tree document from file with given offset + * + * @param input RandomAccessInput + * @param offset Offset in the file + * @param isAggregatedDoc boolean to indicate if aggregated star tree docs should be read + * @return StarTreeDocument + * @throws IOException IOException in case of I/O errors + */ + protected StarTreeDocument readStarTreeDocument(RandomAccessInput input, long offset, boolean isAggregatedDoc) throws IOException { + int dimSize = starTreeField.getDimensionsOrder().size(); + Long[] dimensions = new Long[dimSize]; + long initialOffset = offset; + offset = readDimensions(dimensions, input, offset); + + Object[] metrics = new Object[numMetrics]; + offset = readMetrics(input, offset, numMetrics, metrics, isAggregatedDoc); + assert (offset - initialOffset) == docSizeInBytes; + return new StarTreeDocument(dimensions, metrics); + } + + /** + * Read dimensions from file + */ + protected long readDimensions(Long[] dimensions, RandomAccessInput input, long offset) throws IOException { + for (int i = 0; i < dimensions.length; i++) { + try { + dimensions[i] = input.readLong(offset); + } catch (Exception e) { + logger.error("Error reading dimension value at offset {} for dimension {}", offset, i); + throw e; + } + offset += Long.BYTES; + } + offset += StarTreeDocumentBitSetUtil.readBitSet(input, offset, dimensions, index -> null); + return offset; + } + + /** + * Read star tree metrics from file + */ + protected long readMetrics(RandomAccessInput input, long offset, int numMetrics, Object[] metrics, boolean isAggregatedDoc) + throws IOException { + for (int i = 0; i < numMetrics; i++) { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + metrics[i] = input.readLong(offset); + offset += Long.BYTES; + break; + case DOUBLE: + long val = input.readLong(offset); + if (isAggregatedDoc) { + metrics[i] = StarTreeNumericTypeConverters.sortableLongtoDouble(val); + } else { + metrics[i] = val; + } + offset += Long.BYTES; + break; + default: + throw new IllegalStateException("Unsupported metric type"); + } + } + offset += StarTreeDocumentBitSetUtil.readBitSet( + input, + offset, + metrics, + index -> metricAggregatorInfos.get(index).getValueAggregators().getIdentityMetricValue() + ); + return offset; + } + + /** + * Write star tree document to file + */ + public abstract void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException; + + /** + * Read star tree document from file based on doc id + */ + public abstract StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException; + + /** + * Read star document dimensions from file based on doc id + */ + public abstract Long[] readDimensions(int docId) throws IOException; + + /** + * Read dimension value for given doc id and dimension id + */ + public abstract Long getDimensionValue(int docId, int dimensionId) throws IOException; + + /** + * Delete the temporary files created + */ + public void deleteFiles(boolean success) throws IOException { + if (success) { + for (String file : tmpDirectory.getCreatedFiles()) { + tmpDirectory.deleteFile(file); + } + } else { + deleteFilesIgnoringException(); + } + + } + + /** + * Delete the temporary files created + */ + private void deleteFilesIgnoringException() throws IOException { + for (String file : tmpDirectory.getCreatedFiles()) { + try { + tmpDirectory.deleteFile(file); + } catch (final IOException ignored) {} // similar to IOUtils.deleteFilesWhileIgnoringExceptions + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 7187fade882ea..56bb46e83a9da 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -72,8 +72,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected final TreeNode rootNode = getNewNode(); - private final StarTreeField starTreeField; - private final MapperService mapperService; + protected final StarTreeField starTreeField; private final SegmentWriteState state; static String NUM_SEGMENT_DOCS = "numSegmentDocs"; @@ -95,7 +94,6 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta this.skipStarNodeCreationForDimensions = new HashSet<>(); this.totalSegmentDocs = state.segmentInfo.maxDoc(); - this.mapperService = mapperService; this.state = state; Set skipStarNodeCreationForDimensions = starTreeFieldSpec.getSkipStarNodeCreationInDims(); @@ -141,6 +139,37 @@ public List generateMetricAggregatorInfos(MapperService ma return metricAggregatorInfos; } + /** + * Get star tree document from the segment for the current docId with the dimensionReaders and metricReaders + */ + protected StarTreeDocument getStarTreeDocument( + int currentDocId, + SequentialDocValuesIterator[] dimensionReaders, + List metricReaders + ) throws IOException { + Long[] dims = new Long[numDimensions]; + int i = 0; + for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { + dimensionDocValueIterator.nextDoc(currentDocId); + Long val = dimensionDocValueIterator.value(currentDocId); + dims[i] = val; + i++; + } + i = 0; + Object[] metrics = new Object[metricReaders.size()]; + for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { + metricDocValuesIterator.nextDoc(currentDocId); + // As part of merge, we traverse the star tree doc values + // The type of data stored in metric fields is different from the + // actual indexing field they're based on + metrics[i] = metricAggregatorInfos.get(i) + .getValueAggregators() + .toStarTreeNumericTypeValue(metricDocValuesIterator.value(currentDocId)); + i++; + } + return new StarTreeDocument(dims, metrics); + } + /** * Adds a document to the star-tree. * @@ -163,7 +192,7 @@ public List generateMetricAggregatorInfos(MapperService ma * * @return Star tree documents */ - public abstract List getStarTreeDocuments(); + public abstract List getStarTreeDocuments() throws IOException; /** * Returns the value of the dimension for the given dimension id and document in the star-tree. @@ -330,8 +359,13 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( * @return converted metric value to long */ private static long getLong(Object metric) { - Long metricValue = null; + // TODO : remove this after we merge identity changes + if (metric instanceof Double) { + if (0D == (double) metric) { + return 0L; + } + } try { if (metric instanceof Long) { metricValue = (long) metric; @@ -709,4 +743,8 @@ public void close() throws IOException { } abstract Iterator mergeStarTrees(List starTreeValues) throws IOException; + + public TreeNode getRootNode() { + return rootNode; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java new file mode 100644 index 0000000000000..f63b0cb0cc77d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -0,0 +1,334 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentsSorter; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Off-heap implementation of the star tree builder. + * @opensearch.experimental + */ +@ExperimentalApi +public class OffHeapStarTreeBuilder extends BaseStarTreeBuilder { + private static final Logger logger = LogManager.getLogger(OffHeapStarTreeBuilder.class); + private final StarTreeDocsFileManager starTreeDocumentFileManager; + private final SegmentDocsFileManager segmentDocumentFileManager; + + /** + * Builds star tree based on star tree field configuration consisting of dimensions, metrics and star tree index + * specific configuration. + * + * @param starTreeField holds the configuration for the star tree + * @param state stores the segment write state + * @param mapperService helps to find the original type of the field + */ + protected OffHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) throws IOException { + super(starTreeField, state, mapperService); + segmentDocumentFileManager = new SegmentDocsFileManager(state, starTreeField, metricAggregatorInfos); + try { + starTreeDocumentFileManager = new StarTreeDocsFileManager(state, starTreeField, metricAggregatorInfos); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(segmentDocumentFileManager); + throw e; + } + + } + + @Override + public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException { + starTreeDocumentFileManager.writeStarTreeDocument(starTreeDocument, true); + } + + /** + * Builds star tree based on the star tree values from multiple segments + * + * @param starTreeValuesSubs contains the star tree values from multiple segments + */ + @Override + public void build(List starTreeValuesSubs) throws IOException { + boolean success = false; + try { + build(mergeStarTrees(starTreeValuesSubs)); + success = true; + } finally { + starTreeDocumentFileManager.deleteFiles(success); + segmentDocumentFileManager.deleteFiles(success); + } + } + + /** + * Sorts and aggregates all the documents of the segment based on dimension and metrics configuration + * + * @param dimensionReaders List of docValues readers to read dimensions from the segment + * @param metricReaders List of docValues readers to read metrics from the segment + * @return Iterator of star-tree documents + */ + @Override + public Iterator sortAndAggregateSegmentDocuments( + SequentialDocValuesIterator[] dimensionReaders, + List metricReaders + ) throws IOException { + // Write all dimensions for segment documents into the buffer, + // and sort all documents using an int array + int[] sortedDocIds = new int[totalSegmentDocs]; + for (int i = 0; i < totalSegmentDocs; i++) { + sortedDocIds[i] = i; + } + try { + for (int i = 0; i < totalSegmentDocs; i++) { + StarTreeDocument document = getSegmentStarTreeDocument(i, dimensionReaders, metricReaders); + segmentDocumentFileManager.writeStarTreeDocument(document, false); + } + } catch (IOException ex) { + segmentDocumentFileManager.close(); + throw ex; + } + // Create an iterator for aggregated documents + return sortAndReduceDocuments(sortedDocIds, totalSegmentDocs, false); + } + + /** + * Sorts and aggregates the star-tree documents from multiple segments and builds star tree based on the newly + * aggregated star-tree documents + * + * @param starTreeValuesSubs StarTreeValues from multiple segments + * @return iterator of star tree documents + */ + Iterator mergeStarTrees(List starTreeValuesSubs) throws IOException { + int numDocs = 0; + int[] docIds; + try { + for (StarTreeValues starTreeValues : starTreeValuesSubs) { + List dimensionsSplitOrder = starTreeValues.getStarTreeField().getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[starTreeValues.getStarTreeField() + .getDimensionsOrder() + .size()]; + for (int i = 0; i < dimensionsSplitOrder.size(); i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocValuesIteratorMap().get(dimension)); + } + List metricReaders = new ArrayList<>(); + for (Map.Entry metricDocValuesEntry : starTreeValues.getMetricDocValuesIteratorMap().entrySet()) { + metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); + } + int currentDocId = 0; + int numSegmentDocs = Integer.parseInt( + starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + ); + while (currentDocId < numSegmentDocs) { + StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders); + segmentDocumentFileManager.writeStarTreeDocument(starTreeDocument, true); + numDocs++; + currentDocId++; + } + } + docIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + docIds[i] = i; + } + } catch (IOException ex) { + segmentDocumentFileManager.close(); + throw ex; + } + + if (numDocs == 0) { + return Collections.emptyIterator(); + } + + return sortAndReduceDocuments(docIds, numDocs, true); + } + + /** + * Sorts and reduces the star tree documents based on the dimensions + */ + private Iterator sortAndReduceDocuments(int[] sortedDocIds, int numDocs, boolean isMerge) throws IOException { + try { + if (sortedDocIds == null || sortedDocIds.length == 0) { + logger.debug("Sorted doc ids array is null"); + return Collections.emptyIterator(); + } + try { + StarTreeDocumentsSorter.sort(sortedDocIds, -1, numDocs, index -> { + try { + return segmentDocumentFileManager.readDimensions(sortedDocIds[index]); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } catch (UncheckedIOException ex) { + // Unwrap UncheckedIOException and throw as IOException + if (ex.getCause() != null) { + throw ex.getCause(); + } + throw ex; + } + final StarTreeDocument currentDocument = segmentDocumentFileManager.readStarTreeDocument(sortedDocIds[0], isMerge); + // Create an iterator for aggregated documents + return new Iterator() { + StarTreeDocument tempCurrentDocument = currentDocument; + boolean hasNext = true; + int docId = 1; + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + StarTreeDocument next = reduceSegmentStarTreeDocuments(null, tempCurrentDocument, isMerge); + while (docId < numDocs) { + StarTreeDocument doc; + try { + doc = segmentDocumentFileManager.readStarTreeDocument(sortedDocIds[docId++], isMerge); + } catch (IOException e) { + throw new RuntimeException("Reducing documents failed ", e); + } + if (!Arrays.equals(doc.dimensions, next.dimensions)) { + tempCurrentDocument = doc; + return next; + } else { + next = reduceSegmentStarTreeDocuments(next, doc, isMerge); + } + } + hasNext = false; + try { + segmentDocumentFileManager.close(); + } catch (IOException ex) { + logger.error("Closing segment documents file failed", ex); + } + return next; + } + }; + } catch (IOException ex) { + IOUtils.closeWhileHandlingException(segmentDocumentFileManager); + throw ex; + } + } + + /** + * Get star tree document for the given docId from the star-tree.documents file + */ + @Override + public StarTreeDocument getStarTreeDocument(int docId) throws IOException { + return starTreeDocumentFileManager.readStarTreeDocument(docId, true); + } + + // This should be only used for testing + @Override + public List getStarTreeDocuments() throws IOException { + List starTreeDocuments = new ArrayList<>(); + for (int i = 0; i < numStarTreeDocs; i++) { + starTreeDocuments.add(getStarTreeDocument(i)); + } + return starTreeDocuments; + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + return starTreeDocumentFileManager.getDimensionValue(docId, dimensionId); + } + + /** + * Generates a star-tree for a given star-node + * + * @param startDocId Start document id in the star-tree + * @param endDocId End document id (exclusive) in the star-tree + * @param dimensionId Dimension id of the star-node + * @return iterator for star-tree documents of star-node + * @throws IOException throws when unable to generate star-tree for star-node + */ + @Override + public Iterator generateStarTreeDocumentsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + // Sort all documents using an int array + int numDocs = endDocId - startDocId; + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = startDocId + i; + } + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, index -> { + try { + return starTreeDocumentFileManager.readDimensions(sortedDocIds[index]); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + // Create an iterator for aggregated documents + return new Iterator() { + boolean hasNext = true; + StarTreeDocument currentDocument = getStarTreeDocument(sortedDocIds[0]); + int docId = 1; + + private boolean hasSameDimensions(StarTreeDocument document1, StarTreeDocument document2) { + for (int i = dimensionId + 1; i < starTreeField.getDimensionsOrder().size(); i++) { + if (!Objects.equals(document1.dimensions[i], document2.dimensions[i])) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + StarTreeDocument next = reduceStarTreeDocuments(null, currentDocument); + next.dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (docId < numDocs) { + StarTreeDocument document; + try { + document = getStarTreeDocument(sortedDocIds[docId++]); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (!hasSameDimensions(document, currentDocument)) { + currentDocument = document; + return next; + } else { + next = reduceStarTreeDocuments(next, document); + } + } + hasNext = false; + return next; + } + }; + } + + @Override + public void close() throws IOException { + IOUtils.closeWhileHandlingException(starTreeDocumentFileManager, segmentDocumentFileManager); + super.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 1599be2e76a56..8ff111d3b41d9 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -127,34 +127,12 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); } - boolean endOfDoc = false; int currentDocId = 0; int numSegmentDocs = Integer.parseInt( starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { - Long[] dims = new Long[dimensionsSplitOrder.size()]; - int i = 0; - for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { - dimensionDocValueIterator.nextDoc(currentDocId); - Long val = dimensionDocValueIterator.value(currentDocId); - dims[i] = val; - i++; - } - i = 0; - Object[] metrics = new Object[metricReaders.size()]; - for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { - metricDocValuesIterator.nextDoc(currentDocId); - // As part of merge, we traverse the star tree doc values - // The type of data stored in metric fields is different from the - // actual indexing field they're based on - metrics[i] = metricAggregatorInfos.get(i) - .getValueAggregators() - .toStarTreeNumericTypeValue(metricDocValuesIterator.value(currentDocId)); - i++; - } - StarTreeDocument starTreeDocument = new StarTreeDocument(dims, metrics); - starTreeDocuments.add(starTreeDocument); + starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders)); currentDocId++; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java new file mode 100644 index 0000000000000..fe94df57d9535 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SegmentDocsFileManager.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +/** + * Class for managing segment documents file. + * Segment documents are stored in a single file named 'segment.documents' for sorting and aggregation. A document ID array is created, + * and the document IDs in the array are swapped during sorting based on the actual segment document values in the file. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class SegmentDocsFileManager extends AbstractDocumentsFileManager implements Closeable { + + private static final Logger logger = LogManager.getLogger(SegmentDocsFileManager.class); + private static final String SEGMENT_DOC_FILE_NAME = "segment.documents"; + private IndexInput segmentDocsFileInput; + private RandomAccessInput segmentRandomInput; + final IndexOutput segmentDocsFileOutput; + + public SegmentDocsFileManager(SegmentWriteState state, StarTreeField starTreeField, List metricAggregatorInfos) + throws IOException { + super(state, starTreeField, metricAggregatorInfos); + try { + segmentDocsFileOutput = tmpDirectory.createTempOutput(SEGMENT_DOC_FILE_NAME, state.segmentSuffix, state.context); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(this); + throw e; + } + } + + @Override + public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException { + writeStarTreeDocument(starTreeDocument, segmentDocsFileOutput, isAggregatedDoc); + } + + private void maybeInitializeSegmentInput() throws IOException { + try { + if (segmentDocsFileInput == null) { + IOUtils.closeWhileHandlingException(segmentDocsFileOutput); + segmentDocsFileInput = tmpDirectory.openInput(segmentDocsFileOutput.getName(), state.context); + segmentRandomInput = segmentDocsFileInput.randomAccessSlice(0, segmentDocsFileInput.length()); + } + } catch (IOException e) { + IOUtils.closeWhileHandlingException(this); + throw e; + } + } + + @Override + public StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException { + maybeInitializeSegmentInput(); + return readStarTreeDocument(segmentRandomInput, (long) docId * docSizeInBytes, isAggregatedDoc); + } + + @Override + public Long[] readDimensions(int docId) throws IOException { + maybeInitializeSegmentInput(); + Long[] dims = new Long[starTreeField.getDimensionsOrder().size()]; + readDimensions(dims, segmentRandomInput, (long) docId * docSizeInBytes); + return dims; + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + Long[] dims = readDimensions(docId); + return dims[dimensionId]; + } + + @Override + public void close() throws IOException { + try { + if (this.segmentDocsFileOutput != null) { + IOUtils.closeWhileHandlingException(segmentDocsFileOutput); + tmpDirectory.deleteFile(segmentDocsFileOutput.getName()); + } + } finally { + IOUtils.closeWhileHandlingException(segmentDocsFileInput, segmentDocsFileOutput); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java new file mode 100644 index 0000000000000..779ed77b0540a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocsFileManager.java @@ -0,0 +1,294 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Star tree document file manager. + * This class manages all the temporary files associated with off heap star tree builder. + *

+ * Star tree documents are stored in multiple 'star-tree.documents' files. The algorithm works as follows: + *

    + *
  1. Initially, aggregated documents are created based on the segment documents.
  2. + *
  3. Further, star tree documents are generated (e.g., in the {@code generateStarTreeDocumentsForStarNode} method) by reading the current + * aggregated documents and creating new aggregated star tree documents, which are appended to the 'star-tree.documents' files.
  4. + *
  5. This process is repeated until all combinations of star tree documents are generated.
  6. + *
+ *

In cases where previously written star tree documents need to be read from the 'star-tree.documents' files, the current + * 'star-tree.documents' file is closed, and the values are read. Then, the derived values gets appended to a new 'star-tree.documents' file. + * This is necessary because Lucene maintains immutability of data, and an {@code IndexOutput} cannot be kept open while creating an + * {@code IndexInput} on the same file, as all file contents may not be visible in the reader. Therefore, the {@code IndexOutput} must be + * closed to ensure all data can be read before creating an {@code IndexInput}. Additionally, an {@code IndexOutput} cannot be reopened, + * so a new file is created for the new star tree documents. + *

The set of 'star-tree.documents' files is maintained, and a tracker array is used to keep track of the start document ID for each file. + * Once the number of files reaches a set threshold, the files are merged. + * + */ +public class StarTreeDocsFileManager extends AbstractDocumentsFileManager implements Closeable { + private static final Logger logger = LogManager.getLogger(StarTreeDocsFileManager.class); + private static final String STAR_TREE_DOC_FILE_NAME = "star-tree.documents"; + public static final int DEFAULT_FILE_COUNT_MERGE_THRESHOLD = 5; + private IndexInput starTreeDocsFileInput; + private RandomAccessInput starTreeDocsFileRandomInput; + private IndexOutput starTreeDocsFileOutput; + private final Map fileToEndDocIdMap; + private final List starTreeDocumentOffsets = new ArrayList<>(); + private int currentFileStartDocId; + private int numReadableStarTreeDocuments; + private int starTreeFileCount = -1; + private int currBytes = 0; + private final int fileCountMergeThreshold; + private int numStarTreeDocs = 0; + + public StarTreeDocsFileManager(SegmentWriteState state, StarTreeField starTreeField, List metricAggregatorInfos) + throws IOException { + this(state, starTreeField, metricAggregatorInfos, DEFAULT_FILE_COUNT_MERGE_THRESHOLD); + } + + public StarTreeDocsFileManager( + SegmentWriteState state, + StarTreeField starTreeField, + List metricAggregatorInfos, + int fileCountThreshold + ) throws IOException { + super(state, starTreeField, metricAggregatorInfos); + fileToEndDocIdMap = new LinkedHashMap<>(); + try { + starTreeDocsFileOutput = createStarTreeDocumentsFileOutput(); + } catch (IOException e) { + IOUtils.closeWhileHandlingException(starTreeDocsFileOutput); + IOUtils.closeWhileHandlingException(this); + throw e; + } + fileCountMergeThreshold = fileCountThreshold; + } + + /** + * Creates a new star tree document temporary file to store star tree documents. + */ + IndexOutput createStarTreeDocumentsFileOutput() throws IOException { + starTreeFileCount++; + return tmpDirectory.createTempOutput(STAR_TREE_DOC_FILE_NAME + starTreeFileCount, state.segmentSuffix, state.context); + } + + @Override + public void writeStarTreeDocument(StarTreeDocument starTreeDocument, boolean isAggregatedDoc) throws IOException { + assert isAggregatedDoc == true; + int numBytes = writeStarTreeDocument(starTreeDocument, starTreeDocsFileOutput, true); + addStarTreeDocumentOffset(numBytes); + numStarTreeDocs++; + } + + @Override + public StarTreeDocument readStarTreeDocument(int docId, boolean isAggregatedDoc) throws IOException { + assert isAggregatedDoc == true; + ensureDocumentReadable(docId); + return readStarTreeDocument(starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId), true); + } + + @Override + public Long getDimensionValue(int docId, int dimensionId) throws IOException { + Long[] dims = readDimensions(docId); + return dims[dimensionId]; + } + + @Override + public Long[] readDimensions(int docId) throws IOException { + ensureDocumentReadable(docId); + Long[] dims = new Long[starTreeField.getDimensionsOrder().size()]; + readDimensions(dims, starTreeDocsFileRandomInput, starTreeDocumentOffsets.get(docId)); + return dims; + } + + private void addStarTreeDocumentOffset(int bytes) { + starTreeDocumentOffsets.add(currBytes); + currBytes += bytes; + if (docSizeInBytes == -1) { + docSizeInBytes = bytes; + } + assert docSizeInBytes == bytes; + } + + /** + * Load the correct StarTreeDocuments file based on the docId + */ + private void ensureDocumentReadable(int docId) throws IOException { + ensureDocumentReadable(docId, true); + } + + /** + * Load the correct StarTreeDocuments file based on the docId + * "currentFileStartDocId" and "numReadableStarTreeDocuments" tracks the "start doc id" and "end doc id + 1" + * of the range in the current open 'star-tree.documents' file + */ + private void ensureDocumentReadable(int docId, boolean shouldCreateFileOutput) throws IOException { + try { + if (docId >= currentFileStartDocId && docId < numReadableStarTreeDocuments) { + return; + } + IOUtils.closeWhileHandlingException(starTreeDocsFileInput); + starTreeDocsFileInput = null; + if (docId < numStarTreeDocs) { + loadStarTreeDocumentFile(docId); + } + if (starTreeDocsFileInput != null) { + return; + } + closeAndMaybeCreateNewFile(shouldCreateFileOutput, numStarTreeDocs); + loadStarTreeDocumentFile(docId); + } catch (IOException ex) { + IOUtils.closeWhileHandlingException(this); + throw ex; + } + } + + /** + * The fileToByteSizeMap is in the following format + * file1 == 521 [ contains docs from 0 to 520 ] + * file2 == 780 [ contains docs from 521 to 779 ] + *

+ * This method loads the correct 'star-tree.documents' file based on the docId + * and updates the "currentFileStartDocId" and "numReadableStarTreeDocuments" + */ + private void loadStarTreeDocumentFile(int docId) throws IOException { + int currentFileStartDocId = 0; + for (Map.Entry entry : fileToEndDocIdMap.entrySet()) { + if (docId < entry.getValue()) { + starTreeDocsFileInput = tmpDirectory.openInput(entry.getKey(), state.context); + starTreeDocsFileRandomInput = starTreeDocsFileInput.randomAccessSlice( + starTreeDocsFileInput.getFilePointer(), + starTreeDocsFileInput.length() - starTreeDocsFileInput.getFilePointer() + ); + numReadableStarTreeDocuments = entry.getValue(); + break; + } + currentFileStartDocId = entry.getValue(); + } + this.currentFileStartDocId = currentFileStartDocId; + } + + /** + * This case handles when the requested document ID is beyond the range of the currently open 'star-tree.documents' file. + * In this scenario, the following steps are taken: + *

+ * 1. Close the current 'star-tree.documents' file. + * 2. Create a new 'star-tree.documents' file if the operation involves appending new documents. + * If the operation is only for reading existing documents, a new file is not created. + */ + private void closeAndMaybeCreateNewFile(boolean shouldCreateFileForAppend, int numStarTreeDocs) throws IOException { + currBytes = 0; + if (starTreeDocsFileOutput != null) { + fileToEndDocIdMap.put(starTreeDocsFileOutput.getName(), numStarTreeDocs); + IOUtils.close(starTreeDocsFileOutput); + } + if (shouldCreateFileForAppend) { + starTreeDocsFileOutput = createStarTreeDocumentsFileOutput(); + if (fileToEndDocIdMap.size() >= fileCountMergeThreshold) { + mergeFiles(numStarTreeDocs); + } + } + if (starTreeDocsFileRandomInput != null) { + starTreeDocsFileRandomInput = null; + } + } + + /** + * Merge temporary star tree files once the number of files reach threshold + */ + private void mergeFiles(int numStarTreeDocs) throws IOException { + long st = System.currentTimeMillis(); + try (IndexOutput mergedOutput = createStarTreeDocumentsFileOutput()) { + long mergeBytes = mergeFilesToOutput(mergedOutput); + logger.debug( + "Created merge file : {} in : {} ms with size of : {} KB", + starTreeDocsFileOutput.getName(), + System.currentTimeMillis() - st, + mergeBytes / 1024 + ); + + deleteOldFiles(); + fileToEndDocIdMap.clear(); + fileToEndDocIdMap.put(mergedOutput.getName(), numStarTreeDocs); + resetStarTreeDocumentOffsets(); + } + } + + /** + * Merge all files to single IndexOutput + */ + private long mergeFilesToOutput(IndexOutput mergedOutput) throws IOException { + long mergeBytes = 0L; + for (Map.Entry entry : fileToEndDocIdMap.entrySet()) { + IndexInput input = tmpDirectory.openInput(entry.getKey(), state.context); + mergedOutput.copyBytes(input, input.length()); + mergeBytes += input.length(); + input.close(); + } + return mergeBytes; + } + + /** + * Delete the old star-tree.documents files + */ + private void deleteOldFiles() throws IOException { + for (String fileName : fileToEndDocIdMap.keySet()) { + tmpDirectory.deleteFile(fileName); + } + } + + /** + * Reset the star tree document offsets based on the merged file + */ + private void resetStarTreeDocumentOffsets() { + int curr = 0; + for (int i = 0; i < starTreeDocumentOffsets.size(); i++) { + starTreeDocumentOffsets.set(i, curr); + curr += docSizeInBytes; + } + } + + @Override + public void close() { + try { + if (starTreeDocsFileOutput != null) { + IOUtils.closeWhileHandlingException(starTreeDocsFileOutput); + try { + tmpDirectory.deleteFile(starTreeDocsFileOutput.getName()); + } catch (IOException ignored) {} // similar to IOUtils.deleteFilesIgnoringExceptions + } + } finally { + IOUtils.closeWhileHandlingException(starTreeDocsFileInput, starTreeDocsFileOutput); + } + // Delete all temporary star tree document files + for (String file : fileToEndDocIdMap.keySet()) { + try { + tmpDirectory.deleteFile(file); + } catch (IOException ignored) {} // similar to IOUtils.deleteFilesIgnoringExceptions + } + starTreeDocumentOffsets.clear(); + fileToEndDocIdMap.clear(); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index 6c3d476aa3a55..3b376d7c34351 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -75,7 +75,7 @@ public void build(Map fieldProducerMap) throws IOExce // Build all star-trees for (StarTreeField starTreeField : starTreeFields) { - try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(starTreeField, state, mapperService)) { + try (StarTreeBuilder starTreeBuilder = getStarTreeBuilder(starTreeField, state, mapperService)) { starTreeBuilder.build(fieldProducerMap); } } @@ -102,9 +102,9 @@ public void buildDuringMerge(final Map> starTreeVal continue; } StarTreeField starTreeField = starTreeValuesList.get(0).getStarTreeField(); - StarTreeBuilder builder = getSingleTreeBuilder(starTreeField, state, mapperService); - builder.build(starTreeValuesList); - builder.close(); + try (StarTreeBuilder builder = getStarTreeBuilder(starTreeField, state, mapperService)) { + builder.build(starTreeValuesList); + } } logger.debug( "Took {} ms to merge {} star-trees with star-tree fields", @@ -116,14 +116,13 @@ public void buildDuringMerge(final Map> starTreeVal /** * Get star-tree builder based on build mode. */ - StarTreeBuilder getSingleTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) + StarTreeBuilder getStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) throws IOException { switch (starTreeField.getStarTreeConfig().getBuildMode()) { case ON_HEAP: return new OnHeapStarTreeBuilder(starTreeField, state, mapperService); case OFF_HEAP: - // TODO - // return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); default: throw new IllegalArgumentException( String.format( diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java new file mode 100644 index 0000000000000..a508e497adcdf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtil.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.common.util.ByteArrayBackedBitset; + +import java.io.IOException; +import java.util.function.Function; + +/** + * Helper class to read/write bitset for null values and identity values. + */ +public class StarTreeDocumentBitSetUtil { + /** + * Write bitset for null values. + * + * @param array array of objects + * @param output output stream + * @return number of bytes written + * @throws IOException if an I/O error occurs while writing to the output stream + */ + public static int writeBitSet(Object[] array, IndexOutput output) throws IOException { + ByteArrayBackedBitset bitset = new ByteArrayBackedBitset(getLength(array)); + for (int i = 0; i < array.length; i++) { + if (array[i] == null) { + bitset.set(i); + } + } + return bitset.write(output); + } + + /** + * Set identity values based on bitset. + */ + public static int readBitSet(RandomAccessInput input, long offset, Object[] array, Function identityValueSupplier) + throws IOException { + ByteArrayBackedBitset bitset = new ByteArrayBackedBitset(input, offset, getLength(array)); + for (int i = 0; i < array.length; i++) { + if (bitset.get(i)) { + array[i] = identityValueSupplier.apply(i); + } + } + return bitset.getCurrBytesRead(); + } + + private static int getLength(Object[] array) { + return (array.length / 8) + (array.length % 8 == 0 ? 0 : 1); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java new file mode 100644 index 0000000000000..7b1c63bc611ee --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.util.IntroSorter; + +import java.util.Objects; +import java.util.function.IntFunction; + +/** + * Utility class for building star tree + */ +public class StarTreeDocumentsSorter { + /** + * Sort documents based on the dimension values off heap using intro sorter. + */ + public static void sort( + final int[] sortedDocIds, + final int dimensionId, + final int numDocs, + final IntFunction dimensionsReader + ) { + new IntroSorter() { + private Long[] dimensions; + + @Override + protected void swap(int i, int j) { + int temp = sortedDocIds[i]; + sortedDocIds[i] = sortedDocIds[j]; + sortedDocIds[j] = temp; + } + + @Override + protected void setPivot(int i) { + dimensions = dimensionsReader.apply(i); + } + + @Override + protected int comparePivot(int j) { + Long[] currentDimensions = dimensionsReader.apply(j); + for (int i = dimensionId + 1; i < dimensions.length; i++) { + Long dimension = currentDimensions[i]; + if (!Objects.equals(dimensions[i], dimension)) { + if (dimensions[i] == null && dimension == null) { + return 0; + } + if (dimension == null) { + return -1; + } + if (dimensions[i] == null) { + return 1; + } + return Long.compare(dimensions[i], dimension); + } + } + return 0; + } + }.sort(0, numDocs); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java index 5cf737c61ab2d..a5d59a2602633 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java @@ -62,4 +62,8 @@ public class TreeNode { * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ public Map children; + + public long getDimensionValue() { + return dimensionValue; + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index d2debe762e9be..d9539f9dc0c82 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -84,8 +84,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder { List.of(XContentMapValues.nodeStringArrayValue(paramMap.getOrDefault(SKIP_STAR_NODE_IN_DIMS, new ArrayList()))) ); paramMap.remove(SKIP_STAR_NODE_IN_DIMS); - // TODO : change this to off heap once off heap gets implemented - StarTreeFieldConfiguration.StarTreeBuildMode buildMode = StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP; + StarTreeFieldConfiguration.StarTreeBuildMode buildMode = StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP; List dimensions = buildDimensions(name, paramMap, context); paramMap.remove(ORDERED_DIMENSIONS); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 76a7875919a8b..131d7444ff91c 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -55,6 +55,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -355,19 +356,21 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetricField() throws IOEx } } - public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws IOException { + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/14813") + public void test_sortAndAggregateStarTreeDocuments_nullAndMinusOneInDimensionField() throws IOException { int noOfStarTreeDocuments = 5; StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; // Setting second metric iterator as empty sorted numeric , indicating a metric field is null starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Double[] { 12.0, null, randomDouble() }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, null, randomDouble() }); - starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, null, randomDouble() }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Double[] { 10.0, null, randomDouble() }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Double[] { 14.0, null, randomDouble() }); starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Double[] { 9.0, null, randomDouble() }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble() }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble() }); List inorderStarTreeDocuments = List.of( new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Object[] { 21.0, 0.0, 2L }), - new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 0.0, 3L }) + new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 24.0, 0.0, 2L }), + new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Object[] { 11.0, 0.0, 1L }) ); Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); @@ -388,8 +391,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws I metricsIterators ); - while (segmentStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { - StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + for (StarTreeDocument resultStarTreeDocument : builder.getStarTreeDocuments()) { StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); @@ -399,6 +401,8 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionField() throws I assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); } + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics() throws IOException { @@ -411,7 +415,9 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( starTreeDocuments[3] = new StarTreeDocument(new Long[] { null, null, null, null }, new Double[] { null, null, null }); starTreeDocuments[4] = new StarTreeDocument(new Long[] { null, null, null, null }, new Double[] { null, null, null }); - List inorderStarTreeDocuments = List.of(); + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new Long[] { null, null, null, null }, new Object[] { 0.0, 0.0, 5L }) + ); Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; @@ -446,6 +452,8 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); assertEquals(expectedStarTreeDocument.metrics[2], resultStarTreeDocument.metrics[2]); } + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } public void test_sortAndAggregateStarTreeDocuments_emptyDimensions() throws IOException { @@ -595,6 +603,8 @@ public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() } assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + builder.build(segmentStarTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); } @@ -671,6 +681,7 @@ public void test_build_halfFloatMetrics() throws IOException { Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + builder.build(expectedStarTreeDocumentIterator); } public void test_build_floatMetrics() throws IOException { @@ -975,6 +986,7 @@ public void test_build_starTreeDataset() throws IOException { assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); } + validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); } private static Map> getExpectedDimToValueMap() { @@ -1055,7 +1067,7 @@ public void testFlushFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1081,6 +1093,62 @@ public void testFlushFlow() throws IOException { assertEquals(1L, starTreeDocument.metrics[1]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testFlushFlowDimsReverse() throws IOException { + List dimList = List.of(5L, 4L, 3L, 2L, 1L); + List docsWithField = List.of(0, 1, 2, 3, 4); + List dimList2 = List.of(5L, 4L, 3L, 2L, 1L, 0L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5); + + List metricsList = List.of( + getLongFromDouble(50.0), + getLongFromDouble(40.0), + getLongFromDouble(30.0), + getLongFromDouble(20.0), + getLongFromDouble(10.0), + getLongFromDouble(0.0) + ); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5); + + StarTreeField sf = getStarTreeFieldWithMultipleMetrics(); + SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); + SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); + SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); + + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; + Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( + dimDvs, + List.of(new SequentialDocValuesIterator(m1sndv), new SequentialDocValuesIterator(m2sndv)) + ); + /** + * Asserting following dim / metrics [ dim1, dim2 / Sum [metric], count [metric] ] + [1, 1] | [10.0, 1] + [2, 2] | [20.0, 1] + [3, 3] | [30.0, 1] + [4, 4] | [40.0, 1] + [5, 5] | [50.0, 1] + [null, 0] | [0.0, 1] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] != null) { + assertEquals(count, (long) starTreeDocument.dimensions[0]); + } else { + assertEquals(6, count); + } + assertEquals(starTreeDocument.dimensions[1] * 10.0, starTreeDocument.metrics[0]); + assertEquals(1L, starTreeDocument.metrics[1]); + } + assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testFlushFlowBuild() throws IOException { @@ -1120,7 +1188,7 @@ public void testFlushFlowBuild() throws IOException { SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - BaseStarTreeBuilder builder = getStarTreeBuilder(sf, getWriteState(100), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(100), mapperService); DocValuesProducer d1vp = getDocValuesProducer(d1sndv); DocValuesProducer d2vp = getDocValuesProducer(d2sndv); @@ -1147,7 +1215,7 @@ public void testFlushFlowBuild() throws IOException { starTreeDocument.metrics[0] ); } - builder.close(); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } private static DocValuesProducer getDocValuesProducer(SortedNumericDocValues sndv) { @@ -1209,7 +1277,7 @@ public void testMergeFlowWithSum() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] @@ -1232,6 +1300,8 @@ public void testMergeFlowWithSum() throws IOException { ); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithCount() throws IOException { @@ -1259,7 +1329,7 @@ public void testMergeFlowWithCount() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1279,6 +1349,9 @@ public void testMergeFlowWithCount() throws IOException { assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } private StarTreeValues getStarTreeValues( @@ -1336,7 +1409,7 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1361,6 +1434,68 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { } } assertEquals(9, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowNumSegmentsDocs() throws IOException { + List dimList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List docsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + List dimList2 = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List docsWithField2 = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, -1L, -1L, -1L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List dimList3 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 2, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "6" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [0] + [1, 1] | [1] + [2, 2] | [2] + [3, 3] | [3] + [4, 4] | [4] + [5, 5] | [10] + [6, 6] | [6] + [7, 7] | [7] + [8, 8] | [8] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (Objects.equals(starTreeDocument.dimensions[0], 5L)) { + assertEquals(starTreeDocument.dimensions[0] * 2, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(9, count); } public void testMergeFlowWithMissingDocs() throws IOException { @@ -1396,7 +1531,7 @@ public void testMergeFlowWithMissingDocs() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1421,6 +1556,138 @@ public void testMergeFlowWithMissingDocs() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowWithMissingDocsWithZero() throws IOException { + List dimList = List.of(0L, 0L, 0L, 0L); + List docsWithField = List.of(0, 1, 2, 6); + List dimList2 = List.of(0L, 0L, 0L, 0L); + List docsWithField2 = List.of(0, 1, 2, 6); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6); + + List dimList3 = List.of(5L, 6L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "7" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [9] + [5, 5] | [5] + [6, 6] | [6] + [8, 8] | [8] + [null, 7] | [7] + [null, null] | [12] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(12L, (long) starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + } + + public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException { + List dimList = List.of(0L, 0L, 0L, 0L, 0L); + List docsWithField = List.of(0, 1, 2, 6, 8); + List dimList2 = List.of(0L, 0L, 0L, 0L); + List docsWithField2 = List.of(0, 1, 2, 6); + + List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L); + List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + + List dimList3 = List.of(5L, 6L, 8L, -1L); + List docsWithField3 = List.of(0, 1, 3, 4); + List dimList4 = List.of(5L, 6L, 7L, 8L, -1L); + List docsWithField4 = List.of(0, 1, 2, 3, 4); + + List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); + List metricsWithField2 = List.of(0, 1, 2, 3, 4); + + StarTreeField sf = getStarTreeField(MetricStat.COUNT); + StarTreeValues starTreeValues = getStarTreeValues( + getSortedNumericMock(dimList, docsWithField), + getSortedNumericMock(dimList2, docsWithField2), + getSortedNumericMock(metricsList, metricsWithField), + sf, + "9" + ); + + StarTreeValues starTreeValues2 = getStarTreeValues( + getSortedNumericMock(dimList3, docsWithField3), + getSortedNumericMock(dimList4, docsWithField4), + getSortedNumericMock(metricsList2, metricsWithField2), + sf, + "4" + ); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); + /** + * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] + [0, 0] | [9] + [0, null] | [8] + [5, 5] | [5] + [6, 6] | [6] + [8, 8] | [8] + [null, 7] | [7] + [null, null] | [19] + */ + int count = 0; + while (starTreeDocumentIterator.hasNext()) { + count++; + StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(19L, (long) starTreeDocument.metrics[0]); + assertEquals(7, count); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[1] == null) { + assertEquals(8L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } + } + assertEquals(7, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { @@ -1456,7 +1723,7 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1482,6 +1749,8 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { } } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { @@ -1517,7 +1786,7 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); + builder = getStarTreeBuilder(sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1542,6 +1811,8 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(10, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { @@ -1569,7 +1840,7 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { sf, "0" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(0), mapperService); + builder = getStarTreeBuilder(sf, getWriteState(0), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1590,6 +1861,8 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } assertEquals(6, count); + builder.build(starTreeDocumentIterator); + validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); } public void testMergeFlowWithDuplicateDimensionValues() throws IOException { @@ -1664,8 +1937,8 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); int count = 0; @@ -1693,7 +1966,7 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { count++; } assertEquals(401, count); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } public void testMergeFlowWithMaxLeafDocs() throws IOException { @@ -1774,8 +2047,8 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); /** 635 docs get generated @@ -1790,7 +2063,7 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { [null, null, null, null] | [2495000.0] */ assertEquals(635, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } private StarTreeValues getStarTreeValues( @@ -1892,11 +2165,11 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } public static long getLongFromDouble(double value) { @@ -1991,8 +2264,8 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + builder = getStarTreeBuilder(sf, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); List starTreeDocuments = builder.getStarTreeDocuments(); Map> dimValueToDocIdMap = new HashMap<>(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); @@ -2007,7 +2280,7 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc } } assertEquals(1041, starTreeDocuments.size()); - builder.close(); + validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); } private static StarTreeField getStarTreeField(int maxLeafDocs) { @@ -2151,7 +2424,7 @@ public void testMergeFlow() throws IOException { getAttributes(1000) ); - BaseStarTreeBuilder builder = getStarTreeBuilder(sf, writeState, mapperService); + builder = getStarTreeBuilder(sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** [0, 0, 0, 0] | [0.0] @@ -2163,11 +2436,183 @@ public void testMergeFlow() throws IOException { ... [999, 999, 999, 999] | [19980.0] */ - while (starTreeDocumentIterator.hasNext()) { - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { assertEquals(starTreeDocument.dimensions[0] * 20.0, starTreeDocument.metrics[0]); } - builder.close(); + builder.build(starTreeDocumentIterator); + + // Validate the star tree structure + validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); + } + + private void validateStarTree(TreeNode root, int totalDimensions, int maxLeafDocuments, List starTreeDocuments) { + Queue queue = new LinkedList<>(); + queue.offer(new Object[] { root, false }); + while (!queue.isEmpty()) { + Object[] current = queue.poll(); + TreeNode node = (TreeNode) current[0]; + boolean currentIsStarNode = (boolean) current[1]; + + assertNotNull(node); + + // assert dimensions + if (node.dimensionId != TreeNode.ALL) { + assertTrue(node.dimensionId >= 0 && node.dimensionId < totalDimensions); + } + if (node.children != null && !node.children.isEmpty()) { + assertEquals(node.dimensionId + 1, node.childDimensionId); + assertTrue(node.childDimensionId < totalDimensions); + TreeNode starNode = null; + Object[] nonStarNodeCumulativeMetrics = getMetrics(starTreeDocuments); + for (Map.Entry entry : node.children.entrySet()) { + Long childDimensionValue = entry.getKey(); + TreeNode child = entry.getValue(); + Object[] currMetrics = getMetrics(starTreeDocuments); + if (!child.isStarNode) { + // Validate dimension values in documents + for (int i = child.startDocId; i < child.endDocId; i++) { + StarTreeDocument doc = starTreeDocuments.get(i); + int j = 0; + addMetrics(doc, currMetrics, j); + if (!child.isStarNode) { + Long dimension = doc.dimensions[child.dimensionId]; + assertEquals(childDimensionValue, dimension); + if (dimension != null) { + assertEquals(child.dimensionValue, (long) dimension); + } else { + // TODO : fix this ? + assertEquals(child.dimensionValue, TreeNode.ALL); + } + } + } + Object[] aggregatedMetrics = starTreeDocuments.get(child.aggregatedDocId).metrics; + int j = 0; + for (Object metric : currMetrics) { + /* + * TODO : refactor this to handle any data type + */ + if (metric instanceof Double) { + nonStarNodeCumulativeMetrics[j] = (double) nonStarNodeCumulativeMetrics[j] + (double) metric; + assertEquals((Double) metric, (Double) aggregatedMetrics[j], 0); + } else if (metric instanceof Long) { + nonStarNodeCumulativeMetrics[j] = (long) nonStarNodeCumulativeMetrics[j] + (long) metric; + assertEquals((long) metric, (long) aggregatedMetrics[j]); + } else if (metric instanceof Float) { + nonStarNodeCumulativeMetrics[j] = (float) nonStarNodeCumulativeMetrics[j] + (float) metric; + assertEquals((float) metric, (float) aggregatedMetrics[j], 0); + } + j++; + } + queue.offer(new Object[] { child, false }); + } else { + starNode = child; + } + } + // Add star node to queue + if (starNode != null) { + Object[] starNodeMetrics = getMetrics(starTreeDocuments); + for (int i = starNode.startDocId; i < starNode.endDocId; i++) { + StarTreeDocument doc = starTreeDocuments.get(i); + int j = 0; + addMetrics(doc, starNodeMetrics, j); + } + int j = 0; + Object[] aggregatedMetrics = starTreeDocuments.get(starNode.aggregatedDocId).metrics; + for (Object nonStarNodeCumulativeMetric : nonStarNodeCumulativeMetrics) { + assertEquals(nonStarNodeCumulativeMetric, starNodeMetrics[j]); + assertEquals(starNodeMetrics[j], aggregatedMetrics[j]); + /* + * TODO : refactor this to handle any data type + */ + if (nonStarNodeCumulativeMetric instanceof Double) { + assertEquals((double) nonStarNodeCumulativeMetric, (double) starNodeMetrics[j], 0); + assertEquals((double) nonStarNodeCumulativeMetric, (double) aggregatedMetrics[j], 0); + } else if (nonStarNodeCumulativeMetric instanceof Long) { + assertEquals((long) nonStarNodeCumulativeMetric, (long) starNodeMetrics[j]); + assertEquals((long) nonStarNodeCumulativeMetric, (long) aggregatedMetrics[j]); + } else if (nonStarNodeCumulativeMetric instanceof Float) { + assertEquals((float) nonStarNodeCumulativeMetric, (float) starNodeMetrics[j], 0); + assertEquals((float) nonStarNodeCumulativeMetric, (float) aggregatedMetrics[j], 0); + } + + j++; + } + assertEquals(-1L, starNode.dimensionValue); + queue.offer(new Object[] { starNode, true }); + } + } else { + assertTrue(node.endDocId - node.startDocId <= maxLeafDocuments); + } + + if (currentIsStarNode) { + StarTreeDocument prevDoc = null; + int docCount = 0; + int docId = node.startDocId; + int dimensionId = node.dimensionId; + + while (docId < node.endDocId) { + StarTreeDocument currentDoc = starTreeDocuments.get(docId); + docCount++; + + // Verify that the dimension at 'dimensionId' is set to STAR_IN_DOC_VALUES_INDEX + assertNull(currentDoc.dimensions[dimensionId]); + + // Verify sorting of documents + if (prevDoc != null) { + assertTrue(compareDocuments(prevDoc, currentDoc, dimensionId + 1, totalDimensions) <= 0); + } + prevDoc = currentDoc; + docId++; + } + + // Verify that the number of generated star documents matches the range in the star node + assertEquals(node.endDocId - node.startDocId, docCount); + } + } + } + + /** + * TODO : refactor this to handle any data type + */ + private static void addMetrics(StarTreeDocument doc, Object[] currMetrics, int j) { + for (Object metric : doc.metrics) { + if (metric instanceof Double) { + currMetrics[j] = (double) currMetrics[j] + (double) metric; + } else if (metric instanceof Long) { + currMetrics[j] = (long) currMetrics[j] + (long) metric; + } else if (metric instanceof Float) { + currMetrics[j] = (float) currMetrics[j] + (float) metric; + } + j++; + } + } + + private static Object[] getMetrics(List starTreeDocuments) { + Object[] nonStarNodeCumulativeMetrics = new Object[starTreeDocuments.get(0).metrics.length]; + for (int i = 0; i < nonStarNodeCumulativeMetrics.length; i++) { + if (starTreeDocuments.get(0).metrics[i] instanceof Long) { + nonStarNodeCumulativeMetrics[i] = 0L; + } else if (starTreeDocuments.get(0).metrics[i] instanceof Double) { + nonStarNodeCumulativeMetrics[i] = 0.0; + } else if (starTreeDocuments.get(0).metrics[i] instanceof Float) { + nonStarNodeCumulativeMetrics[i] = 0.0f; + } + } + return nonStarNodeCumulativeMetrics; + } + + private int compareDocuments(StarTreeDocument doc1, StarTreeDocument doc2, int startDim, int endDim) { + for (int i = startDim; i < endDim; i++) { + Long val1 = doc1.dimensions[i]; + Long val2 = doc2.dimensions[i]; + + if (!Objects.equals(val1, val2)) { + if (val1 == null) return 1; + if (val2 == null) return -1; + return Long.compare(val1, val2); + } + } + return 0; } Map getAttributes(int numSegmentDocs) { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java new file mode 100644 index 0000000000000..92382b78f60c6 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; + +public class OffHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { + @Override + public BaseStarTreeBuilder getStarTreeBuilder( + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + return new OffHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java index 564ab110fa7a5..828bddfb8aa6e 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java @@ -97,18 +97,10 @@ public void test_buildWithNoStarTreeFields() throws IOException { public void test_getStarTreeBuilder() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getStarTreeBuilder(starTreeField, segmentWriteState, mapperService); assertTrue(starTreeBuilder instanceof OnHeapStarTreeBuilder); } - public void test_getStarTreeBuilder_illegalArgument() { - when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); - StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(1, new HashSet<>(), StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP); - StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); - StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService)); - } - public void test_closeWithNoStarTreeFields() throws IOException { StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( 1, diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java index dfc83125b2806..f56f7d9906ae1 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIteratorTests.java @@ -127,7 +127,5 @@ public void test_multipleCoordinatedDocumentReader() throws IOException { assertNotEquals(0, sequentialDocValuesIterator2.getDocId()); assertEquals(1, sequentialDocValuesIterator2.getDocId()); assertEquals(9L, (long) sequentialDocValuesIterator2.value(1)); - } - } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java new file mode 100644 index 0000000000000..7d1bd37246fae --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentBitSetUtilTests.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.function.Function; + +/** + * Unit tests for {@link StarTreeDocumentBitSetUtil} + */ +public class StarTreeDocumentBitSetUtilTests extends OpenSearchTestCase { + + public void testWriteAndReadNullBitSets() throws IOException { + for (int k = 0; k < 10; k++) { + int randomArraySize = randomIntBetween(2, 256); + Long[] dims = new Long[randomArraySize]; + for (int i = 0; i < randomArraySize; i++) { + dims[i] = randomLong(); + } + testNullBasedOnBitset(dims); + } + } + + void testNullBasedOnBitset(Long[] dims) throws IOException { + Long[] dims1 = Arrays.copyOf(dims, dims.length); + int randomNullIndex1 = randomIntBetween(0, dims.length - 1); + int randomNullIndex2 = randomIntBetween(0, dims.length - 1); + dims[randomNullIndex1] = null; + dims[randomNullIndex2] = null; + Path basePath = createTempDir("OffHeapTests"); + FSDirectory fsDirectory = FSDirectory.open(basePath); + String TEST_FILE = "test_file"; + IndexOutput indexOutput = fsDirectory.createOutput(TEST_FILE, IOContext.DEFAULT); + StarTreeDocumentBitSetUtil.writeBitSet(dims, indexOutput); + indexOutput.close(); + + // test null value on read + IndexInput in = fsDirectory.openInput(TEST_FILE, IOContext.DEFAULT); + RandomAccessInput randomAccessInput = in.randomAccessSlice(0, in.length()); + Function identityValueSupplier = i -> null; + StarTreeDocumentBitSetUtil.readBitSet(randomAccessInput, 0, dims1, identityValueSupplier); + assertNull(dims1[randomNullIndex1]); + assertNull(dims1[randomNullIndex2]); + in.close(); + + // test identity value on read + long randomLong = randomLong(); + identityValueSupplier = i -> randomLong; + in = fsDirectory.openInput(TEST_FILE, IOContext.DEFAULT); + + randomAccessInput = in.randomAccessSlice(0, in.length()); + StarTreeDocumentBitSetUtil.readBitSet(randomAccessInput, 0, dims1, identityValueSupplier); + assertEquals(randomLong, (long) dims1[randomNullIndex1]); + assertEquals(randomLong, (long) dims1[randomNullIndex2]); + in.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java new file mode 100644 index 0000000000000..b485ea1a4fe3e --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java @@ -0,0 +1,201 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.opensearch.common.Randomness; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; + +/** + * Tests for {@link StarTreeDocumentsSorter}. + */ +public class StarTreeDocumentsSorterTests extends OpenSearchTestCase { + private Map testData; + + @Before + public void setUp() throws Exception { + super.setUp(); + testData = new HashMap<>(); + testData.put(0, new Long[] { -1L, 2L, 3L }); + testData.put(1, new Long[] { 1L, 2L, 2L }); + testData.put(2, new Long[] { -1L, -1L, 3L }); + testData.put(3, new Long[] { 1L, 2L, null }); + testData.put(4, new Long[] { 1L, null, 3L }); + } + + public void testSortDocumentsOffHeap_FirstDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = -1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 2, 0, 1, 3, 4 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_ThirdDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = 1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 1, 0, 2, 4, 3 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_SingleElement() { + int[] sortedDocIds = { 0 }; + int dimensionId = -1; + int numDocs = 1; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 0 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_EmptyArray() { + int[] sortedDocIds = {}; + int dimensionId = -1; + int numDocs = 0; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] {}, sortedDocIds); + } + + public void testSortDocumentsOffHeap_SecondDimensionId() { + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = 0; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + assertArrayEquals(new int[] { 2, 1, 0, 3, 4 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_AllNulls() { + Map testData = new HashMap<>(); + testData.put(0, new Long[] { null, null, null }); + testData.put(1, new Long[] { null, null, null }); + testData.put(2, new Long[] { null, null, null }); + + int[] sortedDocIds = { 0, 1, 2 }; + int dimensionId = -1; + int numDocs = 3; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // The order should remain unchanged as all elements are equal (null) + assertArrayEquals(new int[] { 0, 1, 2 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_Negatives() { + Map testData = new HashMap<>(); + testData.put(0, new Long[] { -10L, 0L }); + testData.put(1, new Long[] { -9L, 0L }); + testData.put(2, new Long[] { -8L, 0L }); + testData.put(3, new Long[] { -7L, -0L }); + testData.put(4, new Long[] { -15L, -0L }); + + int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int dimensionId = -1; + int numDocs = 5; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // The order should remain unchanged as all elements are equal (null) + assertArrayEquals(new int[] { 4, 0, 1, 2, 3 }, sortedDocIds); + } + + public void testRandomSort() { + int i = 0; + while (i < 10) { + testRandomizedSort(); + i++; + } + } + + private void testRandomizedSort() { + + int numDocs = randomIntBetween(0, 1000); + Random random = Randomness.get(); + // skew more towards realistic number of dimensions + int numDimensions = random.nextBoolean() ? randomIntBetween(2, 10) : randomIntBetween(2, 100); + List testData = new ArrayList<>(); + // Generate random test data + for (int i = 0; i < numDocs; i++) { + Long[] dimensions = new Long[numDimensions]; + for (int j = 0; j < numDimensions; j++) { + if (random.nextFloat() < 0.5) { + dimensions[j] = random.nextBoolean() ? Long.valueOf(0L) : random.nextBoolean() ? -1L : null; + } else { + dimensions[j] = random.nextLong(); + } + } + testData.add(dimensions); + } + + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + // sort dimensionId + 1 to numDimensions + // for example to start from dimension in 0th index, we need to pass -1 to sort method + int dimensionId = random.nextInt(numDimensions) - 1; + + // Sort using StarTreeDocumentsSorter + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + + // Verify the sorting + for (int i = 1; i < numDocs; i++) { + Long[] prev = testData.get(sortedDocIds[i - 1]); + Long[] curr = testData.get(sortedDocIds[i]); + boolean isCorrectOrder = true; + for (int j = dimensionId + 1; j < numDimensions; j++) { + int comparison = compareLongs(prev[j], curr[j]); + if (comparison < 0) { + break; + } else if (comparison > 0) { + isCorrectOrder = false; + break; + } + } + assertTrue( + "Sorting error when sorting from dimension index " + + dimensionId + + " Prev : " + + Arrays.toString(prev) + + " :: Curr : " + + Arrays.toString(curr), + isCorrectOrder + ); + } + } + + private int compareLongs(Long a, Long b) { + if (!Objects.equals(a, b)) { + if (a == null) { + return 1; + } else if (b == null) { + return -1; + } else { + return a.compareTo(b); + } + } + return 0; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 3144b1b007924..132d2ff5a566a 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -69,7 +69,7 @@ public void testValidStarTree() throws IOException { List expectedMetrics = Arrays.asList(MetricStat.SUM, MetricStat.AVG); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); - assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals( new HashSet<>(Arrays.asList("@timestamp", "status")), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() @@ -101,7 +101,7 @@ public void testValidStarTreeDefaults() throws IOException { ); assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics()); assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); - assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); + assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); } } From 97c1bf01ff511c4db74dc8a81045447b009bec29 Mon Sep 17 00:00:00 2001 From: Kiran Prakash Date: Wed, 7 Aug 2024 09:39:07 -0700 Subject: [PATCH 09/10] QueryGroup Resource Tracking framework and implementation (#13897) * initial code for the sandbox resource tracking and cancellation framework Signed-off-by: Kiran Prakash * Fix Failing Tests Signed-off-by: Kiran Prakash * spotless Apply Signed-off-by: Kiran Prakash * Update SandboxService.java Signed-off-by: Kiran Prakash * Update SandboxService.java Signed-off-by: Kiran Prakash * Update SandboxTask.java Signed-off-by: Kiran Prakash * Add java docs Signed-off-by: Kiran Prakash * spotless Signed-off-by: Kiran Prakash * javadocs Signed-off-by: Kiran Prakash * javadocs Signed-off-by: Kiran Prakash * java docs Signed-off-by: Kiran Prakash * Update AbstractTaskCancellation.java Signed-off-by: Kiran Prakash * Update SandboxModule.java Signed-off-by: Kiran Prakash * Some tests and stubs Signed-off-by: Kiran Prakash * spotless Signed-off-by: Kiran Prakash * :server:testingConventions Signed-off-by: Kiran Prakash * Update AbstractTaskCancellation.java Signed-off-by: Kiran Prakash * more tests Signed-off-by: Kiran Prakash * addressing comments Signed-off-by: Kiran Prakash * revert some accidentally pushed files Signed-off-by: Kiran Prakash * resolve flakiness Signed-off-by: Kiran Prakash * renaming sandbox to querygroup and adjusting code based on merged PRs Signed-off-by: Kiran Prakash * jvm to memory Signed-off-by: Kiran Prakash * missing java docs Signed-off-by: Kiran Prakash * spotless Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * pluck cancellation changes out of this PR Signed-off-by: Kiran Prakash * remove unused Signed-off-by: Kiran Prakash * remove cancellation related code and add more tests coverage Signed-off-by: Kiran Prakash * us only memory and not jvm Signed-off-by: Kiran Prakash * test conventions Signed-off-by: Kiran Prakash * Bring back enum Signed-off-by: Kiran Prakash * Update SearchBackpressureService.java Signed-off-by: Kiran Prakash * revert changes Signed-off-by: Kiran Prakash * revert changes Signed-off-by: Kiran Prakash * all required changes Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * cleanups Signed-off-by: Kiran Prakash * Delete QueryGroupService.java Signed-off-by: Kiran Prakash * cleanups Signed-off-by: Kiran Prakash * Update QueryGroupLevelResourceUsageViewTests.java Signed-off-by: Kiran Prakash * Update QueryGroupLevelResourceUsageViewTests.java Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * rebasing with latest main Signed-off-by: Kiran Prakash * remove experimental Signed-off-by: Kiran Prakash * remove queryGroupId Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * change code comments Signed-off-by: Kiran Prakash * remmove QueryGroupUsageTracker Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerService.java Signed-off-by: Kiran Prakash * remove QueryGroupTestHelpers Signed-off-by: Kiran Prakash * cleanups Signed-off-by: Kiran Prakash * remove queryGroupHelper Signed-off-by: Kiran Prakash * Update ResourceTypeTests.java Signed-off-by: Kiran Prakash * extend OpenSearchTestCase Signed-off-by: Kiran Prakash * pr comments Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * Update QueryGroupResourceUsageTrackerServiceTests.java Signed-off-by: Kiran Prakash * Update ResourceTypeTests.java Signed-off-by: Kiran Prakash * Update ResourceTypeTests.java Signed-off-by: Kiran Prakash * Update ResourceType.java Signed-off-by: Kiran Prakash * Update ResourceType.java Signed-off-by: Kiran Prakash --------- Signed-off-by: Kiran Prakash --- CHANGELOG.md | 1 + .../opensearch/cluster/metadata/Metadata.java | 2 +- .../org/opensearch/search/ResourceType.java | 21 ++- .../wlm/QueryGroupLevelResourceUsageView.java | 50 +++++++ ...QueryGroupResourceUsageTrackerService.java | 84 ++++++++++++ .../opensearch/wlm/tracker/package-info.java | 12 ++ .../opensearch/search/ResourceTypeTests.java | 52 ++++++++ ...QueryGroupLevelResourceUsageViewTests.java | 64 +++++++++ ...GroupResourceUsageTrackerServiceTests.java | 126 ++++++++++++++++++ 9 files changed, 408 insertions(+), 4 deletions(-) create mode 100644 server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java create mode 100644 server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java create mode 100644 server/src/main/java/org/opensearch/wlm/tracker/package-info.java create mode 100644 server/src/test/java/org/opensearch/search/ResourceTypeTests.java create mode 100644 server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java create mode 100644 server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index be5e5598b09c2..3e83a5bf9b4cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) - [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) - Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) +- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) ### Dependencies - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 440b9e267cf0a..09bef2ddf9ee6 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -1391,7 +1391,7 @@ public Builder put(final QueryGroup queryGroup) { return queryGroups(existing); } - private Map getQueryGroups() { + public Map getQueryGroups() { return Optional.ofNullable(this.customs.get(QueryGroupMetadata.TYPE)) .map(o -> (QueryGroupMetadata) o) .map(QueryGroupMetadata::queryGroups) diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java index fe5ce4dd2bb50..0cba2222a6e20 100644 --- a/server/src/main/java/org/opensearch/search/ResourceType.java +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -10,21 +10,26 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.tasks.Task; import java.io.IOException; +import java.util.function.Function; /** * Enum to hold the resource type */ @PublicApi(since = "2.x") public enum ResourceType { - CPU("cpu"), - MEMORY("memory"); + CPU("cpu", task -> task.getTotalResourceUtilization(ResourceStats.CPU)), + MEMORY("memory", task -> task.getTotalResourceUtilization(ResourceStats.MEMORY)); private final String name; + private final Function getResourceUsage; - ResourceType(String name) { + ResourceType(String name, Function getResourceUsage) { this.name = name; + this.getResourceUsage = getResourceUsage; } /** @@ -48,4 +53,14 @@ public static void writeTo(StreamOutput out, ResourceType resourceType) throws I public String getName() { return name; } + + /** + * Gets the resource usage for a given resource type and task. + * + * @param task the task for which to calculate resource usage + * @return the resource usage + */ + public long getResourceUsage(Task task) { + return getResourceUsage.apply(task); + } } diff --git a/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java b/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java new file mode 100644 index 0000000000000..2fd743dc3f83f --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.search.ResourceType; +import org.opensearch.tasks.Task; + +import java.util.List; +import java.util.Map; + +/** + * Represents the point in time view of resource usage of a QueryGroup and + * has a 1:1 relation with a QueryGroup. + * This class holds the resource usage data and the list of active tasks. + */ +public class QueryGroupLevelResourceUsageView { + // resourceUsage holds the resource usage data for a QueryGroup at a point in time + private final Map resourceUsage; + // activeTasks holds the list of active tasks for a QueryGroup at a point in time + private final List activeTasks; + + public QueryGroupLevelResourceUsageView(Map resourceUsage, List activeTasks) { + this.resourceUsage = resourceUsage; + this.activeTasks = activeTasks; + } + + /** + * Returns the resource usage data. + * + * @return The map of resource usage data + */ + public Map getResourceUsageData() { + return resourceUsage; + } + + /** + * Returns the list of active tasks. + * + * @return The list of active tasks + */ + public List getActiveTasks() { + return activeTasks; + } +} diff --git a/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java b/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java new file mode 100644 index 0000000000000..bfbf5d8a452d1 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.search.ResourceType; +import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; +import org.opensearch.wlm.QueryGroupLevelResourceUsageView; +import org.opensearch.wlm.QueryGroupTask; + +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * This class tracks resource usage per QueryGroup + */ +public class QueryGroupResourceUsageTrackerService { + + public static final EnumSet TRACKED_RESOURCES = EnumSet.allOf(ResourceType.class); + private final TaskResourceTrackingService taskResourceTrackingService; + + /** + * QueryGroupResourceTrackerService constructor + * + * @param taskResourceTrackingService Service that helps track resource usage of tasks running on a node. + */ + public QueryGroupResourceUsageTrackerService(TaskResourceTrackingService taskResourceTrackingService) { + this.taskResourceTrackingService = taskResourceTrackingService; + } + + /** + * Constructs a map of QueryGroupLevelResourceUsageView instances for each QueryGroup. + * + * @return Map of QueryGroup views + */ + public Map constructQueryGroupLevelUsageViews() { + final Map> tasksByQueryGroup = getTasksGroupedByQueryGroup(); + final Map queryGroupViews = new HashMap<>(); + + // Iterate over each QueryGroup entry + for (Map.Entry> queryGroupEntry : tasksByQueryGroup.entrySet()) { + // Compute the QueryGroup usage + final EnumMap queryGroupUsage = new EnumMap<>(ResourceType.class); + for (ResourceType resourceType : TRACKED_RESOURCES) { + long queryGroupResourceUsage = 0; + for (Task task : queryGroupEntry.getValue()) { + queryGroupResourceUsage += resourceType.getResourceUsage(task); + } + queryGroupUsage.put(resourceType, queryGroupResourceUsage); + } + + // Add to the QueryGroup View + queryGroupViews.put( + queryGroupEntry.getKey(), + new QueryGroupLevelResourceUsageView(queryGroupUsage, queryGroupEntry.getValue()) + ); + } + return queryGroupViews; + } + + /** + * Groups tasks by their associated QueryGroup. + * + * @return Map of tasks grouped by QueryGroup + */ + private Map> getTasksGroupedByQueryGroup() { + return taskResourceTrackingService.getResourceAwareTasks() + .values() + .stream() + .filter(QueryGroupTask.class::isInstance) + .map(QueryGroupTask.class::cast) + .collect(Collectors.groupingBy(QueryGroupTask::getQueryGroupId, Collectors.mapping(task -> (Task) task, Collectors.toList()))); + } +} diff --git a/server/src/main/java/org/opensearch/wlm/tracker/package-info.java b/server/src/main/java/org/opensearch/wlm/tracker/package-info.java new file mode 100644 index 0000000000000..86efc99355d3d --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/tracker/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * QueryGroup resource tracking artifacts + */ +package org.opensearch.wlm.tracker; diff --git a/server/src/test/java/org/opensearch/search/ResourceTypeTests.java b/server/src/test/java/org/opensearch/search/ResourceTypeTests.java new file mode 100644 index 0000000000000..78827b8b1bdad --- /dev/null +++ b/server/src/test/java/org/opensearch/search/ResourceTypeTests.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.tasks.CancellableTask; +import org.opensearch.test.OpenSearchTestCase; + +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class ResourceTypeTests extends OpenSearchTestCase { + + public void testFromName() { + assertSame(ResourceType.CPU, ResourceType.fromName("cpu")); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("CPU"); }); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("Cpu"); }); + + assertSame(ResourceType.MEMORY, ResourceType.fromName("memory")); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("Memory"); }); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("MEMORY"); }); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("JVM"); }); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("Heap"); }); + assertThrows(IllegalArgumentException.class, () -> { ResourceType.fromName("Disk"); }); + } + + public void testGetName() { + assertEquals("cpu", ResourceType.CPU.getName()); + assertEquals("memory", ResourceType.MEMORY.getName()); + } + + public void testGetResourceUsage() { + SearchShardTask mockTask = createMockTask(SearchShardTask.class, 100, 200); + assertEquals(100, ResourceType.CPU.getResourceUsage(mockTask)); + assertEquals(200, ResourceType.MEMORY.getResourceUsage(mockTask)); + } + + private T createMockTask(Class type, long cpuUsage, long heapUsage) { + T task = mock(type); + when(task.getTotalResourceUtilization(ResourceStats.CPU)).thenReturn(cpuUsage); + when(task.getTotalResourceUtilization(ResourceStats.MEMORY)).thenReturn(heapUsage); + return task; + } +} diff --git a/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java b/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java new file mode 100644 index 0000000000000..7f6419505fec2 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.action.search.SearchAction; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.search.ResourceType; +import org.opensearch.tasks.Task; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class QueryGroupLevelResourceUsageViewTests extends OpenSearchTestCase { + Map resourceUsage; + List activeTasks; + + public void setUp() throws Exception { + super.setUp(); + resourceUsage = Map.of(ResourceType.fromName("memory"), 34L, ResourceType.fromName("cpu"), 12L); + activeTasks = List.of(getRandomTask(4321)); + } + + public void testGetResourceUsageData() { + QueryGroupLevelResourceUsageView queryGroupLevelResourceUsageView = new QueryGroupLevelResourceUsageView( + resourceUsage, + activeTasks + ); + Map resourceUsageData = queryGroupLevelResourceUsageView.getResourceUsageData(); + assertTrue(assertResourceUsageData(resourceUsageData)); + } + + public void testGetActiveTasks() { + QueryGroupLevelResourceUsageView queryGroupLevelResourceUsageView = new QueryGroupLevelResourceUsageView( + resourceUsage, + activeTasks + ); + List activeTasks = queryGroupLevelResourceUsageView.getActiveTasks(); + assertEquals(1, activeTasks.size()); + assertEquals(4321, activeTasks.get(0).getId()); + } + + private boolean assertResourceUsageData(Map resourceUsageData) { + return resourceUsageData.get(ResourceType.fromName("memory")) == 34L && resourceUsageData.get(ResourceType.fromName("cpu")) == 12L; + } + + private Task getRandomTask(long id) { + return new Task( + id, + "transport", + SearchAction.NAME, + "test description", + new TaskId(randomLong() + ":" + randomLong()), + Collections.emptyMap() + ); + } +} diff --git a/server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java b/server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java new file mode 100644 index 0000000000000..967119583c25f --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchTask; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.search.ResourceType; +import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.wlm.QueryGroupLevelResourceUsageView; +import org.opensearch.wlm.QueryGroupTask; +import org.junit.After; +import org.junit.Before; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.wlm.QueryGroupTask.QUERY_GROUP_ID_HEADER; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class QueryGroupResourceUsageTrackerServiceTests extends OpenSearchTestCase { + TestThreadPool threadPool; + TaskResourceTrackingService mockTaskResourceTrackingService; + QueryGroupResourceUsageTrackerService queryGroupResourceUsageTrackerService; + + @Before + public void setup() { + threadPool = new TestThreadPool(getTestName()); + mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + queryGroupResourceUsageTrackerService = new QueryGroupResourceUsageTrackerService(mockTaskResourceTrackingService); + } + + @After + public void cleanup() { + ThreadPool.terminate(threadPool, 5, TimeUnit.SECONDS); + } + + public void testConstructQueryGroupLevelViews_CreatesQueryGroupLevelUsageView_WhenTasksArePresent() { + List queryGroupIds = List.of("queryGroup1", "queryGroup2", "queryGroup3"); + + Map activeSearchShardTasks = createActiveSearchShardTasks(queryGroupIds); + when(mockTaskResourceTrackingService.getResourceAwareTasks()).thenReturn(activeSearchShardTasks); + Map stringQueryGroupLevelResourceUsageViewMap = queryGroupResourceUsageTrackerService + .constructQueryGroupLevelUsageViews(); + + for (String queryGroupId : queryGroupIds) { + assertEquals( + 400, + (long) stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getResourceUsageData().get(ResourceType.MEMORY) + ); + assertEquals(2, stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getActiveTasks().size()); + } + } + + public void testConstructQueryGroupLevelViews_CreatesQueryGroupLevelUsageView_WhenTasksAreNotPresent() { + Map stringQueryGroupLevelResourceUsageViewMap = queryGroupResourceUsageTrackerService + .constructQueryGroupLevelUsageViews(); + assertTrue(stringQueryGroupLevelResourceUsageViewMap.isEmpty()); + } + + public void testConstructQueryGroupLevelUsageViews_WithTasksHavingDifferentResourceUsage() { + Map activeSearchShardTasks = new HashMap<>(); + activeSearchShardTasks.put(1L, createMockTask(SearchShardTask.class, 100, 200, "queryGroup1")); + activeSearchShardTasks.put(2L, createMockTask(SearchShardTask.class, 200, 400, "queryGroup1")); + when(mockTaskResourceTrackingService.getResourceAwareTasks()).thenReturn(activeSearchShardTasks); + + Map queryGroupViews = queryGroupResourceUsageTrackerService + .constructQueryGroupLevelUsageViews(); + + assertEquals(600, (long) queryGroupViews.get("queryGroup1").getResourceUsageData().get(ResourceType.MEMORY)); + assertEquals(2, queryGroupViews.get("queryGroup1").getActiveTasks().size()); + } + + private Map createActiveSearchShardTasks(List queryGroupIds) { + Map activeSearchShardTasks = new HashMap<>(); + long task_id = 0; + for (String queryGroupId : queryGroupIds) { + for (int i = 0; i < 2; i++) { + activeSearchShardTasks.put(++task_id, createMockTask(SearchShardTask.class, 100, 200, queryGroupId)); + } + } + return activeSearchShardTasks; + } + + private T createMockTask(Class type, long cpuUsage, long heapUsage, String queryGroupId) { + T task = mock(type); + if (task instanceof SearchTask || task instanceof SearchShardTask) { + // Stash the current thread context to ensure that any existing context is preserved and restored after setting the query group + // ID. + try (ThreadContext.StoredContext ignore = threadPool.getThreadContext().stashContext()) { + threadPool.getThreadContext().putHeader(QUERY_GROUP_ID_HEADER, queryGroupId); + ((QueryGroupTask) task).setQueryGroupId(threadPool.getThreadContext()); + } + } + when(task.getTotalResourceUtilization(ResourceStats.CPU)).thenReturn(cpuUsage); + when(task.getTotalResourceUtilization(ResourceStats.MEMORY)).thenReturn(heapUsage); + when(task.getStartTimeNanos()).thenReturn((long) 0); + + AtomicBoolean isCancelled = new AtomicBoolean(false); + doAnswer(invocation -> { + isCancelled.set(true); + return null; + }).when(task).cancel(anyString()); + doAnswer(invocation -> isCancelled.get()).when(task).isCancelled(); + + return task; + } +} From 348c04e7a32e13ea040a1d2e0459c03da9ec0c2c Mon Sep 17 00:00:00 2001 From: Jay Deng Date: Wed, 7 Aug 2024 11:40:52 -0700 Subject: [PATCH 10/10] Fix CHANGELOG for #15054 (#15150) Signed-off-by: Jay Deng --- CHANGELOG-3.0.md | 1 - CHANGELOG.md | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG-3.0.md b/CHANGELOG-3.0.md index 78e93eed0158a..48d978bede420 100644 --- a/CHANGELOG-3.0.md +++ b/CHANGELOG-3.0.md @@ -13,7 +13,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) - Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625)) - Views, simplify data access and manipulation by providing a virtual layer over one or more indices ([#11957](https://github.com/opensearch-project/OpenSearch/pull/11957)) -- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) ### Dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e83a5bf9b4cb..f44949bf38511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) - [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) - Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) +- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) - [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) ### Dependencies