From 1d23b88cbf2177861f3a98ab906accf39c32c766 Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Thu, 19 Oct 2023 12:28:09 +0530 Subject: [PATCH 01/26] Muting s3 request stats test (#10736) Signed-off-by: Gaurav Bafna --- .../opensearch/repositories/s3/S3BlobStoreRepositoryTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 1361f3165b653..4df30bfd2169e 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -165,6 +165,7 @@ protected Settings nodeSettings(int nodeOrdinal) { return builder.build(); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/10735") @Override public void testRequestStats() throws Exception { final String repository = createRepository(randomName()); From 3899d117722e5517c3c02709cffc981bdf677fd5 Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Thu, 19 Oct 2023 17:49:27 +0530 Subject: [PATCH 02/26] Changing version for repo stats blob post backport to 2.x (#10717) Signed-off-by: Gaurav Bafna --- .../opensearch/action/admin/cluster/node/stats/NodeStats.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 0c8aa027e5f01..e9bfa358103c8 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -211,7 +211,7 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { repositoriesStats = null; @@ -465,7 +465,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } } From 69f6f4e30909e215b4ca7fd55fd80cf8c4e8d3a4 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 10:36:25 -0400 Subject: [PATCH 03/26] [AUTO] [main] Add bwc version 2.11.1. (#10648) * Add bwc version 2.11.1 Signed-off-by: GitHub * Update Version.java Signed-off-by: Andriy Redko --------- Signed-off-by: GitHub Signed-off-by: Andriy Redko Co-authored-by: opensearch-ci-bot Co-authored-by: Andriy Redko --- .ci/bwcVersions | 1 + libs/core/src/main/java/org/opensearch/Version.java | 1 + 2 files changed, 2 insertions(+) diff --git a/.ci/bwcVersions b/.ci/bwcVersions index cfaadc5ed1e5e..144a8b71fca39 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -25,4 +25,5 @@ BWC_VERSION: - "2.10.0" - "2.10.1" - "2.11.0" + - "2.11.1" - "2.12.0" diff --git a/libs/core/src/main/java/org/opensearch/Version.java b/libs/core/src/main/java/org/opensearch/Version.java index eef4da719994c..8d9ee73a02c1d 100644 --- a/libs/core/src/main/java/org/opensearch/Version.java +++ b/libs/core/src/main/java/org/opensearch/Version.java @@ -96,6 +96,7 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_2_10_0 = new Version(2100099, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_10_1 = new Version(2100199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_11_0 = new Version(2110099, org.apache.lucene.util.Version.LUCENE_9_7_0); + public static final Version V_2_11_1 = new Version(2110199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_12_0 = new Version(2120099, org.apache.lucene.util.Version.LUCENE_9_8_0); public static final Version V_3_0_0 = new Version(3000099, org.apache.lucene.util.Version.LUCENE_9_8_0); public static final Version CURRENT = V_3_0_0; From da24ca756a3140f062e2c54d8fd0be88dc62e355 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 19 Oct 2023 15:25:46 -0400 Subject: [PATCH 04/26] Performance Improvement for Datetime formats (update version checks to 2.12.0) (#10754) Signed-off-by: Andriy Redko --- .../src/main/java/org/opensearch/search/DocValueFormat.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/DocValueFormat.java b/server/src/main/java/org/opensearch/search/DocValueFormat.java index 412191c57abd8..7be51643eeb7d 100644 --- a/server/src/main/java/org/opensearch/search/DocValueFormat.java +++ b/server/src/main/java/org/opensearch/search/DocValueFormat.java @@ -243,7 +243,7 @@ public DateTime(DateFormatter formatter, ZoneId timeZone, DateFieldMapper.Resolu } public DateTime(StreamInput in) throws IOException { - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.formatter = DateFormatter.forPattern(in.readString(), in.readOptionalString()); } else { this.formatter = DateFormatter.forPattern(in.readString()); @@ -265,12 +265,12 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { - if (out.getVersion().before(Version.V_3_0_0) && formatter.equals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER)) { + if (out.getVersion().before(Version.V_2_12_0) && formatter.equals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER)) { out.writeString(DateFieldMapper.LEGACY_DEFAULT_DATE_TIME_FORMATTER.pattern()); // required for backwards compatibility } else { out.writeString(formatter.pattern()); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalString(formatter.printPattern()); } out.writeString(timeZone.getId()); From e389a09640cf4d687ae5bbe59d36f5e15624e985 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 19 Oct 2023 13:31:01 -0700 Subject: [PATCH 05/26] Fix bug where retries within RemoteStoreRefreshListener cause infos/checkpoint mismatch (#10655) * Fix bug where retries within RemoteStoreRefreshListener cause mismatch between ReplicationCheckpoint and uploaded SegmentInfos. Retries within RemoteStoreRefreshListener run outside of the refresh thread. This means that concurrent refreshes may occur during syncSegments execution updating the on-reader SegmentInfos. A shard's latest ReplicationCheckpoint is computed and set in a refresh listener, but it is not guaranteed the listener has run before the retry fetches the infos or checkpoint independently. This fix ensures the listener recomputes the checkpoint while fetching the SegmentInfos. This change also ensures that we only recompute the checkpoint when necessary because it comes with an IO cost to compute StoreFileMetadata. Signed-off-by: Marc Handalian Update refresh listener to recompute checkpoint from latest infos snapshot. Signed-off-by: Marc Handalian Fix broken test case by comparing segments gen Signed-off-by: Marc Handalian spotless Signed-off-by: Marc Handalian Fix RemoteStoreRefreshListener tests Signed-off-by: Marc Handalian * add extra log Signed-off-by: Marc Handalian --------- Signed-off-by: Marc Handalian --- .../opensearch/index/shard/IndexShard.java | 67 +++++++++++-------- .../shard/RemoteStoreRefreshListener.java | 6 +- .../RemoteStoreRefreshListenerTests.java | 4 +- .../SegmentReplicationIndexShardTests.java | 27 ++++++++ 4 files changed, 70 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 9489c7d7fc1dd..5ebfd3863a6cf 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -1608,8 +1608,11 @@ public GatedCloseable acquireSafeIndexCommit() throws EngineExcepti } /** - * Compute and return the latest ReplicationCheckpoint for a particular shard. - * @return EMPTY checkpoint before the engine is opened and null for non-segrep enabled indices + * return the most recently computed ReplicationCheckpoint for a particular shard. + * The checkpoint is updated inside a refresh listener and may lag behind the SegmentInfos on the reader. + * To guarantee the checkpoint is upto date with the latest on-reader infos, use `getLatestSegmentInfosAndCheckpoint` instead. + * + * @return {@link ReplicationCheckpoint} - The most recently computed ReplicationCheckpoint. */ public ReplicationCheckpoint getLatestReplicationCheckpoint() { return replicationTracker.getLatestReplicationCheckpoint(); @@ -1628,34 +1631,12 @@ public ReplicationCheckpoint getLatestReplicationCheckpoint() { public Tuple, ReplicationCheckpoint> getLatestSegmentInfosAndCheckpoint() { assert indexSettings.isSegRepEnabled(); - Tuple, ReplicationCheckpoint> nullSegmentInfosEmptyCheckpoint = new Tuple<>( - new GatedCloseable<>(null, () -> {}), - getLatestReplicationCheckpoint() - ); - - if (getEngineOrNull() == null) { - return nullSegmentInfosEmptyCheckpoint; - } // do not close the snapshot - caller will close it. GatedCloseable snapshot = null; try { snapshot = getSegmentInfosSnapshot(); - if (snapshot.get() != null) { - SegmentInfos segmentInfos = snapshot.get(); - final Map metadataMap = store.getSegmentMetadataMap(segmentInfos); - return new Tuple<>( - snapshot, - new ReplicationCheckpoint( - this.shardId, - getOperationPrimaryTerm(), - segmentInfos.getGeneration(), - segmentInfos.getVersion(), - metadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), - getEngine().config().getCodec().getName(), - metadataMap - ) - ); - } + final SegmentInfos segmentInfos = snapshot.get(); + return new Tuple<>(snapshot, computeReplicationCheckpoint(segmentInfos)); } catch (IOException | AlreadyClosedException e) { logger.error("Error Fetching SegmentInfos and latest checkpoint", e); if (snapshot != null) { @@ -1666,7 +1647,39 @@ public Tuple, ReplicationCheckpoint> getLatestSegme } } } - return nullSegmentInfosEmptyCheckpoint; + return new Tuple<>(new GatedCloseable<>(null, () -> {}), getLatestReplicationCheckpoint()); + } + + /** + * Compute the latest {@link ReplicationCheckpoint} from a SegmentInfos. + * This function fetches a metadata snapshot from the store that comes with an IO cost. + * We will reuse the existing stored checkpoint if it is at the same SI version. + * + * @param segmentInfos {@link SegmentInfos} infos to use to compute. + * @return {@link ReplicationCheckpoint} Checkpoint computed from the infos. + * @throws IOException When there is an error computing segment metadata from the store. + */ + ReplicationCheckpoint computeReplicationCheckpoint(SegmentInfos segmentInfos) throws IOException { + if (segmentInfos == null) { + return ReplicationCheckpoint.empty(shardId); + } + final ReplicationCheckpoint latestReplicationCheckpoint = getLatestReplicationCheckpoint(); + if (latestReplicationCheckpoint.getSegmentInfosVersion() == segmentInfos.getVersion() + && latestReplicationCheckpoint.getSegmentsGen() == segmentInfos.getGeneration()) { + return latestReplicationCheckpoint; + } + final Map metadataMap = store.getSegmentMetadataMap(segmentInfos); + final ReplicationCheckpoint checkpoint = new ReplicationCheckpoint( + this.shardId, + getOperationPrimaryTerm(), + segmentInfos.getGeneration(), + segmentInfos.getVersion(), + metadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), + getEngine().config().getCodec().getName(), + metadataMap + ); + logger.trace("Recomputed ReplicationCheckpoint for shard {}", checkpoint); + return checkpoint; } /** diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 698e61f6f7a09..c650edc31da8d 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -181,7 +181,6 @@ private boolean syncSegments() { // in the remote store. return indexShard.state() != IndexShardState.STARTED || !(indexShard.getEngine() instanceof InternalEngine); } - ReplicationCheckpoint checkpoint = indexShard.getLatestReplicationCheckpoint(); beforeSegmentsSync(); long refreshTimeMs = segmentTracker.getLocalRefreshTimeMs(), refreshClockTimeMs = segmentTracker.getLocalRefreshClockTimeMs(); long refreshSeqNo = segmentTracker.getLocalRefreshSeqNo(); @@ -199,10 +198,7 @@ private boolean syncSegments() { try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { SegmentInfos segmentInfos = segmentInfosGatedCloseable.get(); - assert segmentInfos.getGeneration() == checkpoint.getSegmentsGen() : "SegmentInfos generation: " - + segmentInfos.getGeneration() - + " does not match metadata generation: " - + checkpoint.getSegmentsGen(); + final ReplicationCheckpoint checkpoint = indexShard.computeReplicationCheckpoint(segmentInfos); // Capture replication checkpoint before uploading the segments as upload can take some time and checkpoint can // move. long lastRefreshedCheckpoint = ((InternalEngine) indexShard.getEngine()).lastRefreshedCheckpoint(); diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index 5a13f57db2c87..51814283c5eb3 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -520,8 +520,8 @@ private Tuple mockIn if (counter.incrementAndGet() <= succeedOnAttempt) { throw new RuntimeException("Inducing failure in upload"); } - return indexShard.getLatestSegmentInfosAndCheckpoint(); - })).when(shard).getLatestSegmentInfosAndCheckpoint(); + return indexShard.getLatestReplicationCheckpoint(); + })).when(shard).computeReplicationCheckpoint(any()); doAnswer(invocation -> { if (Objects.nonNull(successLatch)) { diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 52f28aead533d..eab38bfe5c64d 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -925,6 +925,33 @@ public void testSnapshotWhileFailoverIncomplete() throws Exception { } } + public void testReuseReplicationCheckpointWhenLatestInfosIsUnChanged() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, indexMapping, new NRTReplicationEngineFactory(), createTempDir())) { + final IndexShard primaryShard = shards.getPrimary(); + shards.startAll(); + shards.indexDocs(10); + shards.refresh("test"); + replicateSegments(primaryShard, shards.getReplicas()); + shards.assertAllEqual(10); + final ReplicationCheckpoint latestReplicationCheckpoint = primaryShard.getLatestReplicationCheckpoint(); + try (GatedCloseable segmentInfosSnapshot = primaryShard.getSegmentInfosSnapshot()) { + assertEquals(latestReplicationCheckpoint, primaryShard.computeReplicationCheckpoint(segmentInfosSnapshot.get())); + } + final Tuple, ReplicationCheckpoint> latestSegmentInfosAndCheckpoint = primaryShard + .getLatestSegmentInfosAndCheckpoint(); + try (final GatedCloseable closeable = latestSegmentInfosAndCheckpoint.v1()) { + assertEquals(latestReplicationCheckpoint, primaryShard.computeReplicationCheckpoint(closeable.get())); + } + } + } + + public void testComputeReplicationCheckpointNullInfosReturnsEmptyCheckpoint() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, indexMapping, new NRTReplicationEngineFactory(), createTempDir())) { + final IndexShard primaryShard = shards.getPrimary(); + assertEquals(ReplicationCheckpoint.empty(primaryShard.shardId), primaryShard.computeReplicationCheckpoint(null)); + } + } + private SnapshotShardsService getSnapshotShardsService(IndexShard replicaShard) { final TransportService transportService = mock(TransportService.class); when(transportService.getThreadPool()).thenReturn(threadPool); From 781968b2e53f3214b73d4d8e7c1baa572b334f27 Mon Sep 17 00:00:00 2001 From: Siddhant Deshmukh Date: Thu, 19 Oct 2023 14:39:29 -0700 Subject: [PATCH 06/26] Categorize search queries by type and log query shape (#10724) * Search Query Categorizor initial skeleton using QueryBuilderVisitor Signed-off-by: Siddhant Deshmukh * Integrate metrics framework, add counters and log query shape Signed-off-by: Siddhant Deshmukh * Update changelog Signed-off-by: Siddhant Deshmukh * Add level attribute to QueryBuilderVisitor and as a tag in Counters Signed-off-by: Siddhant Deshmukh * Log query shape as debug log Signed-off-by: Siddhant Deshmukh * Integrate metrics framework, refactor code and update tests Signed-off-by: Siddhant Deshmukh * Fix build Signed-off-by: Siddhant Deshmukh * Add javadocs Signed-off-by: Siddhant Deshmukh * Minor fix Signed-off-by: Siddhant Deshmukh * Spotless check changes Signed-off-by: Siddhant Deshmukh * Address comments, add agg and sort counters, add feature flag, refactoring Signed-off-by: Siddhant Deshmukh * Build fix Signed-off-by: Siddhant Deshmukh * spotless check Signed-off-by: Siddhant Deshmukh * Fix tests Signed-off-by: Siddhant Deshmukh * Dynamic feature flag with callback Signed-off-by: Siddhant Deshmukh * Minor fix Signed-off-by: Siddhant Deshmukh * Add initialization in callback Signed-off-by: Siddhant Deshmukh * Address comments Signed-off-by: Siddhant Deshmukh * Add exception handling Signed-off-by: Siddhant Deshmukh * Refactoring and renaming Signed-off-by: Siddhant Deshmukh * Minor fix Signed-off-by: Siddhant Deshmukh * Fix changelog and minor refactoring Signed-off-by: Siddhant Deshmukh * Address review comments Signed-off-by: Siddhant Deshmukh * Add unit tests Signed-off-by: Siddhant Deshmukh * Address review comments and add complex query unit test Signed-off-by: Siddhant Deshmukh * Add sort order as a tag to sort counter Signed-off-by: Siddhant Deshmukh * Address review comments Signed-off-by: Siddhant Deshmukh * Address final comments Signed-off-by: Siddhant Deshmukh * Build fix Signed-off-by: Siddhant Deshmukh * Fix build tests failure Signed-off-by: Siddhant Deshmukh * Minor fix Signed-off-by: Siddhant Deshmukh * Minor fix Signed-off-by: Siddhant Deshmukh * Empty commit Signed-off-by: Siddhant Deshmukh * Remove extra newline Signed-off-by: Michael Froh * Empty commit Signed-off-by: Siddhant Deshmukh --------- Signed-off-by: Siddhant Deshmukh Signed-off-by: Michael Froh Co-authored-by: Michael Froh --- CHANGELOG.md | 1 + .../action/search/SearchQueryCategorizer.java | 81 +++++++ .../SearchQueryCategorizingVisitor.java | 73 ++++++ .../action/search/SearchQueryCounters.java | 117 +++++++++ .../action/search/TransportSearchAction.java | 36 ++- .../common/settings/ClusterSettings.java | 1 + .../index/query/QueryShapeVisitor.java | 86 +++++++ .../search/SearchQueryCategorizerTests.java | 228 ++++++++++++++++++ .../index/query/QueryShapeVisitorTests.java | 31 +++ .../snapshots/SnapshotResiliencyTests.java | 4 +- 10 files changed, 656 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java create mode 100644 server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java create mode 100644 server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java create mode 100644 server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java create mode 100644 server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java create mode 100644 server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ad18b94f31b7..552c277789dd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) - [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) +- Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java new file mode 100644 index 0000000000000..9cbe2d2ffcb7d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilderVisitor; +import org.opensearch.index.query.QueryShapeVisitor; +import org.opensearch.search.aggregations.AggregatorFactories; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilder; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.List; +import java.util.ListIterator; + +/** + * Class to categorize the search queries based on the type and increment the relevant counters. + * Class also logs the query shape. + */ +final class SearchQueryCategorizer { + + private static final Logger log = LogManager.getLogger(SearchQueryCategorizer.class); + + final SearchQueryCounters searchQueryCounters; + + public SearchQueryCategorizer(MetricsRegistry metricsRegistry) { + searchQueryCounters = new SearchQueryCounters(metricsRegistry); + } + + public void categorize(SearchSourceBuilder source) { + QueryBuilder topLevelQueryBuilder = source.query(); + + logQueryShape(topLevelQueryBuilder); + incrementQueryTypeCounters(topLevelQueryBuilder); + incrementQueryAggregationCounters(source.aggregations()); + incrementQuerySortCounters(source.sorts()); + } + + private void incrementQuerySortCounters(List> sorts) { + if (sorts != null && sorts.size() > 0) { + for (ListIterator> it = sorts.listIterator(); it.hasNext();) { + SortBuilder sortBuilder = it.next(); + String sortOrder = sortBuilder.order().toString(); + searchQueryCounters.sortCounter.add(1, Tags.create().addTag("sort_order", sortOrder)); + } + } + } + + private void incrementQueryAggregationCounters(AggregatorFactories.Builder aggregations) { + if (aggregations != null) { + searchQueryCounters.aggCounter.add(1); + } + } + + private void incrementQueryTypeCounters(QueryBuilder topLevelQueryBuilder) { + if (topLevelQueryBuilder == null) { + return; + } + QueryBuilderVisitor searchQueryVisitor = new SearchQueryCategorizingVisitor(searchQueryCounters); + topLevelQueryBuilder.visit(searchQueryVisitor); + } + + private void logQueryShape(QueryBuilder topLevelQueryBuilder) { + if (topLevelQueryBuilder == null) { + return; + } + QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); + topLevelQueryBuilder.visit(shapeVisitor); + log.debug("Query shape : {}", shapeVisitor.prettyPrintTree(" ")); + } + +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java new file mode 100644 index 0000000000000..98f0169e69a5c --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.apache.lucene.search.BooleanClause; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilderVisitor; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.RegexpQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.telemetry.metrics.tags.Tags; + +/** + * Class to visit the querybuilder tree and also track the level information. + * Increments the counters related to Search Query type. + */ +final class SearchQueryCategorizingVisitor implements QueryBuilderVisitor { + private static final String LEVEL_TAG = "level"; + private final int level; + private final SearchQueryCounters searchQueryCounters; + + public SearchQueryCategorizingVisitor(SearchQueryCounters searchQueryCounters) { + this(searchQueryCounters, 0); + } + + private SearchQueryCategorizingVisitor(SearchQueryCounters counters, int level) { + this.searchQueryCounters = counters; + this.level = level; + } + + public void accept(QueryBuilder qb) { + if (qb instanceof BoolQueryBuilder) { + searchQueryCounters.boolCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof FunctionScoreQueryBuilder) { + searchQueryCounters.functionScoreCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MatchQueryBuilder) { + searchQueryCounters.matchCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MatchPhraseQueryBuilder) { + searchQueryCounters.matchPhrasePrefixCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MultiMatchQueryBuilder) { + searchQueryCounters.multiMatchCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof QueryStringQueryBuilder) { + searchQueryCounters.queryStringQueryCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof RangeQueryBuilder) { + searchQueryCounters.rangeCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof RegexpQueryBuilder) { + searchQueryCounters.regexCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof TermQueryBuilder) { + searchQueryCounters.termCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof WildcardQueryBuilder) { + searchQueryCounters.wildcardCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else { + searchQueryCounters.otherQueryCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } + } + + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + return new SearchQueryCategorizingVisitor(searchQueryCounters, level + 1); + } +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java new file mode 100644 index 0000000000000..7e0259af07701 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.MetricsRegistry; + +/** + * Class contains all the Counters related to search query types. + */ +final class SearchQueryCounters { + private static final String UNIT = "1"; + private final MetricsRegistry metricsRegistry; + + // Counters related to Query types + public final Counter aggCounter; + public final Counter boolCounter; + public final Counter functionScoreCounter; + public final Counter matchCounter; + public final Counter matchPhrasePrefixCounter; + public final Counter multiMatchCounter; + public final Counter otherQueryCounter; + public final Counter queryStringQueryCounter; + public final Counter rangeCounter; + public final Counter regexCounter; + + public final Counter sortCounter; + public final Counter skippedCounter; + public final Counter termCounter; + public final Counter totalCounter; + public final Counter wildcardCounter; + + public SearchQueryCounters(MetricsRegistry metricsRegistry) { + this.metricsRegistry = metricsRegistry; + this.aggCounter = metricsRegistry.createCounter( + "search.query.type.agg.count", + "Counter for the number of top level agg search queries", + UNIT + ); + this.boolCounter = metricsRegistry.createCounter( + "search.query.type.bool.count", + "Counter for the number of top level and nested bool search queries", + UNIT + ); + this.functionScoreCounter = metricsRegistry.createCounter( + "search.query.type.functionscore.count", + "Counter for the number of top level and nested function score search queries", + UNIT + ); + this.matchCounter = metricsRegistry.createCounter( + "search.query.type.match.count", + "Counter for the number of top level and nested match search queries", + UNIT + ); + this.matchPhrasePrefixCounter = metricsRegistry.createCounter( + "search.query.type.matchphrase.count", + "Counter for the number of top level and nested match phrase prefix search queries", + UNIT + ); + this.multiMatchCounter = metricsRegistry.createCounter( + "search.query.type.multimatch.count", + "Counter for the number of top level and nested multi match search queries", + UNIT + ); + this.otherQueryCounter = metricsRegistry.createCounter( + "search.query.type.other.count", + "Counter for the number of top level and nested search queries that do not match any other categories", + UNIT + ); + this.queryStringQueryCounter = metricsRegistry.createCounter( + "search.query.type.querystringquery.count", + "Counter for the number of top level and nested queryStringQuery search queries", + UNIT + ); + this.rangeCounter = metricsRegistry.createCounter( + "search.query.type.range.count", + "Counter for the number of top level and nested range search queries", + UNIT + ); + this.regexCounter = metricsRegistry.createCounter( + "search.query.type.regex.count", + "Counter for the number of top level and nested regex search queries", + UNIT + ); + this.skippedCounter = metricsRegistry.createCounter( + "search.query.type.skipped.count", + "Counter for the number queries skipped due to error", + UNIT + ); + this.sortCounter = metricsRegistry.createCounter( + "search.query.type.sort.count", + "Counter for the number of top level sort search queries", + UNIT + ); + this.termCounter = metricsRegistry.createCounter( + "search.query.type.term.count", + "Counter for the number of top level and nested term search queries", + UNIT + ); + this.totalCounter = metricsRegistry.createCounter( + "search.query.type.total.count", + "Counter for the number of top level and nested search queries", + UNIT + ); + this.wildcardCounter = metricsRegistry.createCounter( + "search.query.type.wildcard.count", + "Counter for the number of top level and nested wildcard search queries", + UNIT + ); + } +} diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 284f71bd9da62..a6fb8453af4ff 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -88,6 +88,7 @@ import org.opensearch.search.profile.SearchProfileShardResults; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.RemoteClusterAware; import org.opensearch.transport.RemoteClusterService; @@ -137,6 +138,13 @@ public class TransportSearchAction extends HandledTransportAction SEARCH_QUERY_METRICS_ENABLED_SETTING = Setting.boolSetting( + "search.query.metrics.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + // cluster level setting for timeout based search cancellation. If search request level parameter is present then that will take // precedence over the cluster setting value public static final String SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING_KEY = "search.cancel_after_time_interval"; @@ -177,8 +185,14 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -211,6 +226,17 @@ public TransportSearchAction( this.isRequestStatsEnabled = clusterService.getClusterSettings().get(SEARCH_REQUEST_STATS_ENABLED); clusterService.getClusterSettings().addSettingsUpdateConsumer(SEARCH_REQUEST_STATS_ENABLED, this::setIsRequestStatsEnabled); this.searchRequestStats = searchRequestStats; + this.metricsRegistry = metricsRegistry; + this.searchQueryMetricsEnabled = clusterService.getClusterSettings().get(SEARCH_QUERY_METRICS_ENABLED_SETTING); + clusterService.getClusterSettings() + .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); + } + + private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { + this.searchQueryMetricsEnabled = searchQueryMetricsEnabled; + if ((this.searchQueryMetricsEnabled == true) && this.searchQueryCategorizer == null) { + this.searchQueryCategorizer = new SearchQueryCategorizer(metricsRegistry); + } } private void setIsRequestStatsEnabled(boolean isRequestStatsEnabled) { @@ -489,6 +515,14 @@ private void executeRequest( return; } + if (searchQueryMetricsEnabled) { + try { + searchQueryCategorizer.categorize(searchRequest.source()); + } catch (Exception e) { + logger.error("Error while trying to categorize the query.", e); + } + } + ActionListener rewriteListener = ActionListener.wrap(source -> { if (source != searchRequest.source()) { // only set it if it changed - we don't allow null values to be set but it might be already null. this way we catch diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 90f91dcb7c553..76883c200542e 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -377,6 +377,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING, TransportSearchAction.SEARCH_REQUEST_STATS_ENABLED, TransportSearchAction.SEARCH_PHASE_TOOK_ENABLED, + TransportSearchAction.SEARCH_QUERY_METRICS_ENABLED_SETTING, RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE, SniffConnectionStrategy.REMOTE_CONNECTIONS_PER_CLUSTER, RemoteClusterService.REMOTE_INITIAL_CONNECTION_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java b/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java new file mode 100644 index 0000000000000..3ba13bc7a2da4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.search.BooleanClause; +import org.opensearch.common.SetOnce; + +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Class to traverse the QueryBuilder tree and capture the query shape + */ +public final class QueryShapeVisitor implements QueryBuilderVisitor { + private final SetOnce queryType = new SetOnce<>(); + private final Map> childVisitors = new EnumMap<>(BooleanClause.Occur.class); + + @Override + public void accept(QueryBuilder qb) { + queryType.set(qb.getName()); + } + + @Override + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + // Should get called once per Occur value + if (childVisitors.containsKey(occur)) { + throw new IllegalStateException("child visitor already called for " + occur); + } + final List childVisitorList = new ArrayList<>(); + QueryBuilderVisitor childVisitorWrapper = new QueryBuilderVisitor() { + QueryShapeVisitor currentChild; + + @Override + public void accept(QueryBuilder qb) { + currentChild = new QueryShapeVisitor(); + childVisitorList.add(currentChild); + currentChild.accept(qb); + } + + @Override + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + return currentChild.getChildVisitor(occur); + } + }; + childVisitors.put(occur, childVisitorList); + return childVisitorWrapper; + } + + String toJson() { + StringBuilder outputBuilder = new StringBuilder("{\"type\":\"").append(queryType.get()).append("\""); + for (Map.Entry> entry : childVisitors.entrySet()) { + outputBuilder.append(",\"").append(entry.getKey().name().toLowerCase(Locale.ROOT)).append("\"["); + boolean first = true; + for (QueryShapeVisitor child : entry.getValue()) { + if (!first) { + outputBuilder.append(","); + } + outputBuilder.append(child.toJson()); + first = false; + } + outputBuilder.append("]"); + } + outputBuilder.append("}"); + return outputBuilder.toString(); + } + + public String prettyPrintTree(String indent) { + StringBuilder outputBuilder = new StringBuilder(indent).append(queryType.get()).append("\n"); + for (Map.Entry> entry : childVisitors.entrySet()) { + outputBuilder.append(indent).append(" ").append(entry.getKey().name().toLowerCase(Locale.ROOT)).append(":\n"); + for (QueryShapeVisitor child : entry.getValue()) { + outputBuilder.append(child.prettyPrintTree(indent + " ")); + } + } + return outputBuilder.toString(); + } +} diff --git a/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java b/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java new file mode 100644 index 0000000000000..a2e301143d694 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java @@ -0,0 +1,228 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.BoostingQueryBuilder; +import org.opensearch.index.query.MatchNoneQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.RegexpQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.ScoreSortBuilder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.Arrays; + +import org.mockito.Mockito; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + +public final class SearchQueryCategorizerTests extends OpenSearchTestCase { + + private MetricsRegistry metricsRegistry; + + private SearchQueryCategorizer searchQueryCategorizer; + + @Before + public void setup() { + metricsRegistry = mock(MetricsRegistry.class); + when(metricsRegistry.createCounter(any(String.class), any(String.class), any(String.class))).thenAnswer( + invocation -> mock(Counter.class) + ); + searchQueryCategorizer = new SearchQueryCategorizer(metricsRegistry); + } + + public void testAggregationsQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.aggregation( + new MultiTermsAggregationBuilder("agg1").terms( + Arrays.asList( + new MultiTermsValuesSourceConfig.Builder().setFieldName("username").build(), + new MultiTermsValuesSourceConfig.Builder().setFieldName("rating").build() + ) + ) + ); + sourceBuilder.size(0); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.aggCounter).add(eq(1.0d)); + } + + public void testBoolQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new BoolQueryBuilder().must(new MatchQueryBuilder("searchText", "fox"))); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.boolCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testFunctionScoreQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.functionScoreCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMatchQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.matchQuery("tags", "php")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMatchPhraseQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.matchPhraseQuery("tags", "php")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchPhrasePrefixCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMultiMatchQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new MultiMatchQueryBuilder("foo bar", "myField")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.multiMatchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testOtherQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + BoostingQueryBuilder queryBuilder = new BoostingQueryBuilder( + new TermQueryBuilder("unmapped_field", "foo"), + new MatchNoneQueryBuilder() + ); + sourceBuilder.query(queryBuilder); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.otherQueryCounter, times(2)).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testQueryStringQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + QueryStringQueryBuilder queryBuilder = new QueryStringQueryBuilder("foo:*"); + sourceBuilder.query(queryBuilder); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.queryStringQueryCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testRangeQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + RangeQueryBuilder rangeQuery = new RangeQueryBuilder("date"); + rangeQuery.gte("1970-01-01"); + rangeQuery.lt("1982-01-01"); + sourceBuilder.query(rangeQuery); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.rangeCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testRegexQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(new RegexpQueryBuilder("field", "text")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.regexCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testSortQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(QueryBuilders.matchQuery("tags", "ruby")); + sourceBuilder.sort("creationDate", SortOrder.DESC); + sourceBuilder.sort(new ScoreSortBuilder()); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.sortCounter, times(2)).add(eq(1.0d), any(Tags.class)); + } + + public void testTermQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.termQuery("field", "value2")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testWildcardQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new WildcardQueryBuilder("field", "text")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.wildcardCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testComplexQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + + TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("field", "value2"); + MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("tags", "php"); + RegexpQueryBuilder regexpQueryBuilder = new RegexpQueryBuilder("field", "text"); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder().must(termQueryBuilder) + .filter(matchQueryBuilder) + .should(regexpQueryBuilder); + sourceBuilder.query(boolQueryBuilder); + sourceBuilder.aggregation(new RangeAggregationBuilder("agg1").field("num")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.regexCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.boolCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.aggCounter).add(eq(1.0d)); + } +} diff --git a/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java b/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java new file mode 100644 index 0000000000000..18b814aec61c2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.opensearch.test.OpenSearchTestCase; + +import static org.junit.Assert.assertEquals; + +public final class QueryShapeVisitorTests extends OpenSearchTestCase { + public void testQueryShapeVisitor() { + QueryBuilder builder = new BoolQueryBuilder().must(new TermQueryBuilder("foo", "bar")) + .filter(new ConstantScoreQueryBuilder(new RangeQueryBuilder("timestamp").from("12345677").to("2345678"))) + .should( + new BoolQueryBuilder().must(new MatchQueryBuilder("text", "this is some text")) + .mustNot(new RegexpQueryBuilder("color", "red.*")) + ) + .must(new TermsQueryBuilder("genre", "action", "drama", "romance")); + QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); + builder.visit(shapeVisitor); + assertEquals( + "{\"type\":\"bool\",\"must\"[{\"type\":\"term\"},{\"type\":\"terms\"}],\"filter\"[{\"type\":\"constant_score\",\"filter\"[{\"type\":\"range\"}]}],\"should\"[{\"type\":\"bool\",\"must\"[{\"type\":\"match\"}],\"must_not\"[{\"type\":\"regexp\"}]}]}", + shapeVisitor.toJson() + ); + } +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 97c5d23831965..2f9f38d18a064 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -222,6 +222,7 @@ import org.opensearch.search.query.QueryPhase; import org.opensearch.snapshots.mockstore.MockEventuallyConsistentRepository; import org.opensearch.tasks.TaskResourceTrackingService; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.disruption.DisruptableMockTransport; @@ -2302,7 +2303,8 @@ public void onFailure(final Exception e) { List.of(), client ), - null + null, + NoopMetricsRegistry.INSTANCE ) ); actions.put( From 200ad5d28a577877be530ecab507601898025c5c Mon Sep 17 00:00:00 2001 From: Ticheng Lin <51488860+ticheng-aws@users.noreply.github.com> Date: Thu, 19 Oct 2023 19:58:13 -0700 Subject: [PATCH 07/26] Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight (#10352) * Fix timer race condition in profile rewrite and create weight for concurrent segment search (#10352) Signed-off-by: Ticheng Lin * Refactor and work on the PR comments (#10352) Signed-off-by: Ticheng Lin --------- Signed-off-by: Ticheng Lin --- CHANGELOG.md | 1 + .../search/profile/query/QueryProfilerIT.java | 157 +++++++++++++++++- .../opensearch/search/profile/Profilers.java | 7 +- .../org/opensearch/search/profile/Timer.java | 12 ++ .../query/AbstractQueryProfileTree.java | 5 +- .../ConcurrentQueryProfileBreakdown.java | 27 ++- .../query/ConcurrentQueryProfiler.java | 134 +++++++++++++++ .../search/profile/query/QueryProfiler.java | 14 +- .../ConcurrentQueryProfileBreakdownTests.java | 52 ++++++ .../query/ConcurrentQueryProfilerTests.java | 36 ++++ .../profile/query/QueryProfilerTests.java | 16 +- 11 files changed, 438 insertions(+), 23 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java create mode 100644 server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 552c277789dd7..5c52c43a35b8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) - [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) +- Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java b/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java index 5f794d2abf878..ef73438114079 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java @@ -32,6 +32,8 @@ package org.opensearch.search.profile.query; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.apache.lucene.tests.util.English; import org.opensearch.action.index.IndexRequestBuilder; import org.opensearch.action.search.MultiSearchResponse; @@ -40,20 +42,23 @@ import org.opensearch.action.search.SearchType; import org.opensearch.action.search.ShardSearchFailure; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.profile.ProfileResult; import org.opensearch.search.profile.ProfileShardResult; import org.opensearch.search.sort.SortOrder; -import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.ParameterizedOpenSearchIntegTestCase; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; import static org.opensearch.search.profile.query.RandomQueryGenerator.randomQueryBuilder; import static org.hamcrest.Matchers.emptyOrNullString; import static org.hamcrest.Matchers.equalTo; @@ -61,8 +66,32 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; + +public class QueryProfilerIT extends ParameterizedOpenSearchIntegTestCase { + private final boolean concurrentSearchEnabled; + private static final String MAX_PREFIX = "max_"; + private static final String MIN_PREFIX = "min_"; + private static final String AVG_PREFIX = "avg_"; + private static final String TIMING_TYPE_COUNT_SUFFIX = "_count"; + + public QueryProfilerIT(Settings settings, boolean concurrentSearchEnabled) { + super(settings); + this.concurrentSearchEnabled = concurrentSearchEnabled; + } -public class QueryProfilerIT extends OpenSearchIntegTestCase { + @ParametersFactory + public static Collection parameters() { + return Arrays.asList( + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build(), false }, + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build(), true } + ); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.CONCURRENT_SEGMENT_SEARCH, "true").build(); + } /** * This test simply checks to make sure nothing crashes. Test indexes 100-150 documents, @@ -229,6 +258,7 @@ public void testSimpleMatch() throws Exception { assertEquals(result.getLuceneDescription(), "field1:one"); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -271,6 +301,7 @@ public void testBool() throws Exception { assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); assertEquals(result.getProfiledChildren().size(), 2); + assertQueryProfileResult(result); // Check the children List children = result.getProfiledChildren(); @@ -282,12 +313,14 @@ public void testBool() throws Exception { assertThat(childProfile.getTime(), greaterThan(0L)); assertNotNull(childProfile.getTimeBreakdown()); assertEquals(childProfile.getProfiledChildren().size(), 0); + assertQueryProfileResult(childProfile); childProfile = children.get(1); assertEquals(childProfile.getQueryName(), "TermQuery"); assertEquals(childProfile.getLuceneDescription(), "field1:two"); assertThat(childProfile.getTime(), greaterThan(0L)); assertNotNull(childProfile.getTimeBreakdown()); + assertQueryProfileResult(childProfile); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -330,6 +363,7 @@ public void testEmptyBool() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -375,6 +409,7 @@ public void testCollapsingBool() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -415,6 +450,90 @@ public void testBoosting() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); + } + + CollectorResult result = searchProfiles.getCollectorResult(); + assertThat(result.getName(), is(not(emptyOrNullString()))); + assertThat(result.getTime(), greaterThan(0L)); + } + } + } + + public void testSearchLeafForItsLeavesAndRewriteQuery() throws Exception { + createIndex("test"); + ensureGreen(); + + int numDocs = 122; + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; i++) { + docs[i] = client().prepareIndex("test").setId(String.valueOf(i)).setSource("field1", English.intToEnglish(i), "field2", i); + } + + List terms = Arrays.asList("zero", "zero", "one"); + + indexRandom(true, docs); + + refresh(); + + QueryBuilder q = QueryBuilders.boostingQuery( + QueryBuilders.idsQuery().addIds(String.valueOf(randomInt()), String.valueOf(randomInt())), + QueryBuilders.termsQuery("field1", terms) + ).boost(randomFloat()).negativeBoost(randomFloat()); + logger.info("Query: {}", q); + + SearchResponse resp = client().prepareSearch() + .setQuery(q) + .setTrackTotalHits(true) + .setProfile(true) + .setSearchType(SearchType.QUERY_THEN_FETCH) + .get(); + + assertNotNull("Profile response element should not be null", resp.getProfileResults()); + assertThat("Profile response should not be an empty array", resp.getProfileResults().size(), not(0)); + + for (Map.Entry shardResult : resp.getProfileResults().entrySet()) { + assertThat(shardResult.getValue().getNetworkTime().getInboundNetworkTime(), greaterThanOrEqualTo(0L)); + assertThat(shardResult.getValue().getNetworkTime().getOutboundNetworkTime(), greaterThanOrEqualTo(0L)); + for (QueryProfileShardResult searchProfiles : shardResult.getValue().getQueryProfileResults()) { + List results = searchProfiles.getQueryResults(); + for (ProfileResult result : results) { + assertNotNull(result.getQueryName()); + assertNotNull(result.getLuceneDescription()); + assertThat(result.getTime(), greaterThan(0L)); + Map breakdown = result.getTimeBreakdown(); + Long maxSliceTime = result.getMaxSliceTime(); + Long minSliceTime = result.getMinSliceTime(); + Long avgSliceTime = result.getAvgSliceTime(); + if (concurrentSearchEnabled && results.get(0).equals(result)) { + assertNotNull(maxSliceTime); + assertNotNull(minSliceTime); + assertNotNull(avgSliceTime); + assertThat(breakdown.size(), equalTo(66)); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + if (queryTimingType != QueryTimingType.CREATE_WEIGHT) { + String maxTimingType = MAX_PREFIX + queryTimingType; + String minTimingType = MIN_PREFIX + queryTimingType; + String avgTimingType = AVG_PREFIX + queryTimingType; + assertNotNull(breakdown.get(maxTimingType)); + assertNotNull(breakdown.get(minTimingType)); + assertNotNull(breakdown.get(avgTimingType)); + assertNotNull(breakdown.get(maxTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(minTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(avgTimingType + TIMING_TYPE_COUNT_SUFFIX)); + } + } + } else if (concurrentSearchEnabled) { + assertThat(maxSliceTime, equalTo(0L)); + assertThat(minSliceTime, equalTo(0L)); + assertThat(avgSliceTime, equalTo(0L)); + assertThat(breakdown.size(), equalTo(27)); + } else { + assertThat(maxSliceTime, is(nullValue())); + assertThat(minSliceTime, is(nullValue())); + assertThat(avgSliceTime, is(nullValue())); + assertThat(breakdown.size(), equalTo(27)); + } } CollectorResult result = searchProfiles.getCollectorResult(); @@ -455,6 +574,7 @@ public void testDisMaxRange() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -494,6 +614,7 @@ public void testRange() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -547,6 +668,7 @@ public void testPhrase() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -579,4 +701,35 @@ public void testNoProfile() throws Exception { assertThat("Profile response element should be an empty map", resp.getProfileResults().size(), equalTo(0)); } + private void assertQueryProfileResult(ProfileResult result) { + Map breakdown = result.getTimeBreakdown(); + Long maxSliceTime = result.getMaxSliceTime(); + Long minSliceTime = result.getMinSliceTime(); + Long avgSliceTime = result.getAvgSliceTime(); + if (concurrentSearchEnabled) { + assertNotNull(maxSliceTime); + assertNotNull(minSliceTime); + assertNotNull(avgSliceTime); + assertThat(breakdown.size(), equalTo(66)); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + if (queryTimingType != QueryTimingType.CREATE_WEIGHT) { + String maxTimingType = MAX_PREFIX + queryTimingType; + String minTimingType = MIN_PREFIX + queryTimingType; + String avgTimingType = AVG_PREFIX + queryTimingType; + assertNotNull(breakdown.get(maxTimingType)); + assertNotNull(breakdown.get(minTimingType)); + assertNotNull(breakdown.get(avgTimingType)); + assertNotNull(breakdown.get(maxTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(minTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(avgTimingType + TIMING_TYPE_COUNT_SUFFIX)); + } + } + } else { + assertThat(maxSliceTime, is(nullValue())); + assertThat(minSliceTime, is(nullValue())); + assertThat(avgSliceTime, is(nullValue())); + assertThat(breakdown.size(), equalTo(27)); + } + } + } diff --git a/server/src/main/java/org/opensearch/search/profile/Profilers.java b/server/src/main/java/org/opensearch/search/profile/Profilers.java index 8e87c7ff4acd4..68cf05c988b5b 100644 --- a/server/src/main/java/org/opensearch/search/profile/Profilers.java +++ b/server/src/main/java/org/opensearch/search/profile/Profilers.java @@ -35,6 +35,9 @@ import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.profile.aggregation.AggregationProfiler; import org.opensearch.search.profile.aggregation.ConcurrentAggregationProfiler; +import org.opensearch.search.profile.query.ConcurrentQueryProfileTree; +import org.opensearch.search.profile.query.ConcurrentQueryProfiler; +import org.opensearch.search.profile.query.InternalQueryProfileTree; import org.opensearch.search.profile.query.QueryProfiler; import java.util.ArrayList; @@ -64,7 +67,9 @@ public Profilers(ContextIndexSearcher searcher, boolean isConcurrentSegmentSearc /** Switch to a new profile. */ public QueryProfiler addQueryProfiler() { - QueryProfiler profiler = new QueryProfiler(isConcurrentSegmentSearchEnabled); + QueryProfiler profiler = isConcurrentSegmentSearchEnabled + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); queryProfilers.add(profiler); return profiler; diff --git a/server/src/main/java/org/opensearch/search/profile/Timer.java b/server/src/main/java/org/opensearch/search/profile/Timer.java index 172762cabeb6a..864c689cf7fa0 100644 --- a/server/src/main/java/org/opensearch/search/profile/Timer.java +++ b/server/src/main/java/org/opensearch/search/profile/Timer.java @@ -53,6 +53,18 @@ public class Timer { private boolean doTiming; private long timing, count, lastCount, start, earliestTimerStartTime; + public Timer() { + this(0, 0, 0, 0, 0); + } + + public Timer(long timing, long count, long lastCount, long start, long earliestTimerStartTime) { + this.timing = timing; + this.count = count; + this.lastCount = lastCount; + this.start = start; + this.earliestTimerStartTime = earliestTimerStartTime; + } + /** pkg-private for testing */ long nanoTime() { return System.nanoTime(); diff --git a/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java b/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java index 8e825def13f5d..2f5d632ee2d87 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java +++ b/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java @@ -54,14 +54,11 @@ public void startRewriteTime() { * startRewriteTime() must be called for a particular context prior to calling * stopAndAddRewriteTime(), otherwise the elapsed time will be negative and * nonsensical - * - * @return The elapsed time */ - public long stopAndAddRewriteTime() { + public void stopAndAddRewriteTime() { long time = Math.max(1, System.nanoTime() - rewriteScratch); rewriteTime += time; rewriteScratch = 0; - return time; } public long getRewriteTime() { diff --git a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java index e567fdd2d436c..59ef01f9f947a 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java +++ b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java @@ -70,7 +70,7 @@ public Map toBreakdownMap() { ); final long createWeightTime = topLevelBreakdownMapWithWeightTime.get(QueryTimingType.CREATE_WEIGHT.toString()); - if (sliceCollectorsToLeaves.isEmpty() || contexts.isEmpty()) { + if (contexts.isEmpty()) { // If there are no leaf contexts, then return the default concurrent query level breakdown, which will include the // create_weight time/count queryNodeTime = createWeightTime; @@ -78,6 +78,21 @@ public Map toBreakdownMap() { minSliceNodeTime = 0L; avgSliceNodeTime = 0L; return buildDefaultQueryBreakdownMap(createWeightTime); + } else if (sliceCollectorsToLeaves.isEmpty()) { + // This will happen when each slice executes search leaf for its leaves and query is rewritten for the leaf being searched. It + // creates a new weight and breakdown map for each rewritten query. This new breakdown map captures the timing information for + // the new rewritten query. The sliceCollectorsToLeaves is empty because this breakdown for rewritten query gets created later + // in search leaf path which doesn't have collector. Also, this is not needed since this breakdown is per leaf and there is no + // concurrency involved. An empty sliceCollectorsToLeaves could also happen in the case of early termination. + AbstractProfileBreakdown breakdown = contexts.values().iterator().next(); + queryNodeTime = breakdown.toNodeTime() + createWeightTime; + maxSliceNodeTime = 0L; + minSliceNodeTime = 0L; + avgSliceNodeTime = 0L; + Map queryBreakdownMap = new HashMap<>(breakdown.toBreakdownMap()); + queryBreakdownMap.put(QueryTimingType.CREATE_WEIGHT.toString(), createWeightTime); + queryBreakdownMap.put(QueryTimingType.CREATE_WEIGHT + TIMING_TYPE_COUNT_SUFFIX, 1L); + return queryBreakdownMap; } // first create the slice level breakdowns @@ -191,10 +206,12 @@ Map> buildSliceLevelBreakdown() { } // compute sliceMaxEndTime as max of sliceEndTime across all timing types sliceMaxEndTime = Math.max(sliceMaxEndTime, currentSliceBreakdown.getOrDefault(timingTypeSliceEndTimeKey, Long.MIN_VALUE)); - sliceMinStartTime = Math.min( - sliceMinStartTime, - currentSliceBreakdown.getOrDefault(timingTypeSliceStartTimeKey, Long.MAX_VALUE) - ); + long currentSliceStartTime = currentSliceBreakdown.getOrDefault(timingTypeSliceStartTimeKey, Long.MAX_VALUE); + if (currentSliceStartTime == 0L) { + // The timer for the current timing type never starts, so we continue here + continue; + } + sliceMinStartTime = Math.min(sliceMinStartTime, currentSliceStartTime); // compute total time for each timing type at slice level using sliceEndTime and sliceStartTime currentSliceBreakdown.put( timingType.toString(), diff --git a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java new file mode 100644 index 0000000000000..42bf23bb13fbe --- /dev/null +++ b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.profile.query; + +import org.apache.lucene.search.Query; +import org.opensearch.search.profile.ContextualProfileBreakdown; +import org.opensearch.search.profile.ProfileResult; +import org.opensearch.search.profile.Timer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This class acts as a thread-local storage for profiling a query with concurrent execution + * + * @opensearch.internal + */ +public final class ConcurrentQueryProfiler extends QueryProfiler { + + private final Map threadToProfileTree; + // The LinkedList does not need to be thread safe, as the map associates thread IDs with LinkedList, and only + // one thread will access the LinkedList at a time. + private final Map> threadToRewriteTimers; + + public ConcurrentQueryProfiler(AbstractQueryProfileTree profileTree) { + super(profileTree); + long threadId = getCurrentThreadId(); + // We utilize LinkedHashMap to preserve the insertion order of the profiled queries + threadToProfileTree = Collections.synchronizedMap(new LinkedHashMap<>()); + threadToProfileTree.put(threadId, (ConcurrentQueryProfileTree) profileTree); + threadToRewriteTimers = new ConcurrentHashMap<>(); + threadToRewriteTimers.put(threadId, new LinkedList<>()); + } + + @Override + public ContextualProfileBreakdown getQueryBreakdown(Query query) { + ConcurrentQueryProfileTree profileTree = threadToProfileTree.computeIfAbsent( + getCurrentThreadId(), + k -> new ConcurrentQueryProfileTree() + ); + return profileTree.getProfileBreakdown(query); + } + + /** + * Removes the last (e.g. most recent) element on ConcurrentQueryProfileTree stack. + */ + @Override + public void pollLastElement() { + ConcurrentQueryProfileTree concurrentProfileTree = threadToProfileTree.get(getCurrentThreadId()); + if (concurrentProfileTree != null) { + concurrentProfileTree.pollLast(); + } + } + + /** + * @return a hierarchical representation of the profiled tree + */ + @Override + public List getTree() { + List profileResults = new ArrayList<>(); + for (Map.Entry profile : threadToProfileTree.entrySet()) { + profileResults.addAll(profile.getValue().getTree()); + } + return profileResults; + } + + /** + * Begin timing the rewrite phase of a request + */ + @Override + public void startRewriteTime() { + Timer rewriteTimer = new Timer(); + threadToRewriteTimers.computeIfAbsent(getCurrentThreadId(), k -> new LinkedList<>()).add(rewriteTimer); + rewriteTimer.start(); + } + + /** + * Stop recording the current rewrite timer + */ + public void stopAndAddRewriteTime() { + Timer rewriteTimer = threadToRewriteTimers.get(getCurrentThreadId()).getLast(); + rewriteTimer.stop(); + } + + /** + * @return total time taken to rewrite all queries in this concurrent query profiler + */ + @Override + public long getRewriteTime() { + long totalRewriteTime = 0L; + List rewriteTimers = new LinkedList<>(); + threadToRewriteTimers.values().forEach(rewriteTimers::addAll); + LinkedList mergedIntervals = mergeRewriteTimeIntervals(rewriteTimers); + for (long[] interval : mergedIntervals) { + totalRewriteTime += interval[1] - interval[0]; + } + return totalRewriteTime; + } + + // package private for unit testing + LinkedList mergeRewriteTimeIntervals(List timers) { + LinkedList mergedIntervals = new LinkedList<>(); + timers.sort(Comparator.comparingLong(Timer::getEarliestTimerStartTime)); + for (Timer timer : timers) { + long startTime = timer.getEarliestTimerStartTime(); + long endTime = startTime + timer.getApproximateTiming(); + if (mergedIntervals.isEmpty() || mergedIntervals.getLast()[1] < startTime) { + long[] interval = new long[2]; + interval[0] = startTime; + interval[1] = endTime; + mergedIntervals.add(interval); + } else { + mergedIntervals.getLast()[1] = Math.max(mergedIntervals.getLast()[1], endTime); + } + } + return mergedIntervals; + } + + private long getCurrentThreadId() { + return Thread.currentThread().getId(); + } +} diff --git a/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java b/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java index a80ce1c658081..332c4b3551450 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java +++ b/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java @@ -51,15 +51,15 @@ * * @opensearch.internal */ -public final class QueryProfiler extends AbstractProfiler, Query> { +public class QueryProfiler extends AbstractProfiler, Query> { /** * The root Collector used in the search */ private InternalProfileComponent collector; - public QueryProfiler(boolean concurrent) { - super(concurrent ? new ConcurrentQueryProfileTree() : new InternalQueryProfileTree()); + public QueryProfiler(AbstractQueryProfileTree profileTree) { + super(profileTree); } /** Set the collector that is associated with this profiler. */ @@ -81,14 +81,14 @@ public void startRewriteTime() { /** * Stop recording the current rewrite and add it's time to the total tally, returning the * cumulative time so far. - * - * @return cumulative rewrite time */ - public long stopAndAddRewriteTime() { - return ((AbstractQueryProfileTree) profileTree).stopAndAddRewriteTime(); + public void stopAndAddRewriteTime() { + ((AbstractQueryProfileTree) profileTree).stopAndAddRewriteTime(); } /** + * The rewriting process is complex and hard to display because queries can undergo significant changes. + * Instead of showing intermediate results, we display the cumulative time for the non-concurrent search case. * @return total time taken to rewrite all queries in this profile */ public long getRewriteTime() { diff --git a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java index f29ba3b0cea07..db14eb90ef839 100644 --- a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java +++ b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java @@ -333,6 +333,58 @@ public void testBreakDownMapWithMultipleSlicesAndOneSliceWithNoLeafContext() thr directory.close(); } + public void testOneLeafContextWithEmptySliceCollectorsToLeaves() throws Exception { + final DirectoryReader directoryReader = getDirectoryReader(1); + final Directory directory = directoryReader.directory(); + final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); + final AbstractProfileBreakdown leafProfileBreakdown_1 = new TestQueryProfileBreakdown( + QueryTimingType.class, + leafProfileBreakdownMap_1 + ); + testQueryProfileBreakdown.getContexts().put(directoryReader.leaves().get(0), leafProfileBreakdown_1); + final Map queryBreakDownMap = testQueryProfileBreakdown.toBreakdownMap(); + assertFalse(queryBreakDownMap == null || queryBreakDownMap.isEmpty()); + assertEquals(26, queryBreakDownMap.size()); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + String timingTypeKey = queryTimingType.toString(); + String timingTypeCountKey = queryTimingType + TIMING_TYPE_COUNT_SUFFIX; + + if (queryTimingType.equals(QueryTimingType.CREATE_WEIGHT)) { + final long createWeightTime = queryBreakDownMap.get(timingTypeKey); + assertEquals(createWeightTimer.getApproximateTiming(), createWeightTime); + assertEquals(1, (long) queryBreakDownMap.get(timingTypeCountKey)); + // verify there is no min/max/avg for weight type stats + assertFalse( + queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey) + ); + continue; + } + assertNotNull(queryBreakDownMap.get(timingTypeKey)); + assertNotNull(queryBreakDownMap.get(timingTypeCountKey)); + // verify there is no min/max/avg for current breakdown type stats + assertFalse( + queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey) + ); + } + assertEquals(0, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getAvgSliceNodeTime()); + directoryReader.close(); + directory.close(); + } + private Map getLeafBreakdownMap(long startTime, long timeTaken, long count) { Map leafBreakDownMap = new HashMap<>(); for (QueryTimingType timingType : QueryTimingType.values()) { diff --git a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java new file mode 100644 index 0000000000000..736bbcdd9e8dd --- /dev/null +++ b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.profile.query; + +import org.opensearch.search.profile.Timer; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.LinkedList; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class ConcurrentQueryProfilerTests extends OpenSearchTestCase { + + public void testMergeRewriteTimeIntervals() { + ConcurrentQueryProfiler profiler = new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()); + List timers = new LinkedList<>(); + timers.add(new Timer(217134L, 1L, 1L, 0L, 553074511206907L)); + timers.add(new Timer(228954L, 1L, 1L, 0L, 553074509287335L)); + timers.add(new Timer(228954L, 1L, 1L, 0L, 553074509287336L)); + LinkedList mergedIntervals = profiler.mergeRewriteTimeIntervals(timers); + assertThat(mergedIntervals.size(), equalTo(2)); + long[] interval = mergedIntervals.get(0); + assertThat(interval[0], equalTo(553074509287335L)); + assertThat(interval[1], equalTo(553074509516290L)); + interval = mergedIntervals.get(1); + assertThat(interval[0], equalTo(553074511206907L)); + assertThat(interval[1], equalTo(553074511424041L)); + } +} diff --git a/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java b/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java index 64a440b85eb10..481a224f2ff0e 100644 --- a/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java +++ b/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java @@ -161,7 +161,9 @@ public void tearDown() throws Exception { } public void testBasic() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.search(query, 1); @@ -228,7 +230,9 @@ public void testBasic() throws IOException { } public void testNoScoring() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.search(query, 1, Sort.INDEXORDER); // scores are not needed @@ -295,7 +299,9 @@ public void testNoScoring() throws IOException { } public void testUseIndexStats() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.count(query); // will use index stats @@ -309,7 +315,9 @@ public void testUseIndexStats() throws IOException { } public void testApproximations() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new RandomApproximationQuery(new TermQuery(new Term("foo", "bar")), random()); searcher.count(query); From 41a12e28fff58c7262bfeb725c8a31e5ffa266f3 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Thu, 19 Oct 2023 20:33:32 -0700 Subject: [PATCH 08/26] Make RemoteStoreReplicationSource#getSegmentFiles asynchronous (#10725) * Make RemoteStoreReplicationSource#getSegmentFiles asynchronous Also make the remote store download process cancellable in case the replication event is canceled. Signed-off-by: Andrew Ross * Add ITs ensuring segRep targets are cleaned up on cancellation during metadata and segment fetch steps. Signed-off-by: Marc Handalian * Wrap metadata fetch in cancellableThreads.executeIO Signed-off-by: Marc Handalian * self review Signed-off-by: Marc Handalian * spotless Signed-off-by: Marc Handalian * Add missing node settings when bootstrapping nodes in tests. Signed-off-by: Marc Handalian --------- Signed-off-by: Andrew Ross Signed-off-by: Marc Handalian Co-authored-by: Andrew Ross --- ...emoteStoreMockRepositoryIntegTestCase.java | 5 + ...plicationUsingRemoteStoreDisruptionIT.java | 133 ++++++++++++++++++ .../store/RemoteStoreFileDownloader.java | 72 ++++++---- .../RemoteStoreReplicationSource.java | 68 +++++---- .../replication/common/ReplicationTarget.java | 11 +- .../store/RemoteStoreFileDownloaderTests.java | 125 ++++++++++++++-- .../AbstractSnapshotIntegTestCase.java | 6 + .../snapshots/mockstore/MockRepository.java | 15 ++ 8 files changed, 373 insertions(+), 62 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java index 2053800504c89..8166c0008ed83 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java @@ -114,6 +114,10 @@ protected void cleanupRepo() { } protected String setup(Path repoLocation, double ioFailureRate, String skipExceptionBlobList, long maxFailure) { + return setup(repoLocation, ioFailureRate, skipExceptionBlobList, maxFailure, 0); + } + + protected String setup(Path repoLocation, double ioFailureRate, String skipExceptionBlobList, long maxFailure, int replicaCount) { // The random_control_io_exception_rate setting ensures that 10-25% of all operations to remote store results in /// IOException. skip_exception_on_verification_file & skip_exception_on_list_blobs settings ensures that the // repository creation can happen without failure. @@ -128,6 +132,7 @@ protected String setup(Path repoLocation, double ioFailureRate, String skipExcep internalCluster().startClusterManagerOnlyNode(settings.build()); String dataNodeName = internalCluster().startDataOnlyNode(settings.build()); + internalCluster().startDataOnlyNodes(replicaCount, settings.build()); createIndex(INDEX_NAME); logger.info("--> Created index={}", INDEX_NAME); ensureYellowAndNoInitializingShards(INDEX_NAME); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java new file mode 100644 index 0000000000000..b7b3f1d14f422 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.SegmentReplicationState; +import org.opensearch.indices.replication.SegmentReplicationTarget; +import org.opensearch.indices.replication.SegmentReplicationTargetService; +import org.opensearch.indices.replication.common.ReplicationCollection; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Optional; +import java.util.Set; + +/** + * This class runs tests with remote store + segRep while blocking file downloads + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SegmentReplicationUsingRemoteStoreDisruptionIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { + + @Override + public Settings indexSettings() { + return remoteStoreIndexSettings(1); + } + + @Override + protected boolean addMockInternalEngine() { + return false; + } + + public void testCancelReplicationWhileSyncingSegments() throws Exception { + Path location = randomRepoPath().toAbsolutePath(); + setup(location, 0d, "metadata", Long.MAX_VALUE, 1); + + final Set dataNodeNames = internalCluster().getDataNodeNames(); + final String replicaNode = getNode(dataNodeNames, false); + final String primaryNode = getNode(dataNodeNames, true); + + SegmentReplicationTargetService targetService = internalCluster().getInstance(SegmentReplicationTargetService.class, replicaNode); + ensureGreen(INDEX_NAME); + blockNodeOnAnySegmentFile(REPOSITORY_NAME, replicaNode); + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + indexSingleDoc(); + refresh(INDEX_NAME); + waitForBlock(replicaNode, REPOSITORY_NAME, TimeValue.timeValueSeconds(10)); + final SegmentReplicationState state = targetService.getOngoingEventSegmentReplicationState(indexShard.shardId()); + assertEquals(SegmentReplicationState.Stage.GET_FILES, state.getStage()); + ReplicationCollection.ReplicationRef segmentReplicationTargetReplicationRef = targetService.get( + state.getReplicationId() + ); + final SegmentReplicationTarget segmentReplicationTarget = segmentReplicationTargetReplicationRef.get(); + // close the target ref here otherwise it will hold a refcount + segmentReplicationTargetReplicationRef.close(); + assertNotNull(segmentReplicationTarget); + assertTrue(segmentReplicationTarget.refCount() > 0); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode)); + assertBusy(() -> { + assertTrue(indexShard.routingEntry().primary()); + assertNull(targetService.getOngoingEventSegmentReplicationState(indexShard.shardId())); + assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); + }); + unblockNode(REPOSITORY_NAME, replicaNode); + cleanupRepo(); + } + + public void testCancelReplicationWhileFetchingMetadata() throws Exception { + Path location = randomRepoPath().toAbsolutePath(); + setup(location, 0d, "metadata", Long.MAX_VALUE, 1); + + final Set dataNodeNames = internalCluster().getDataNodeNames(); + final String replicaNode = getNode(dataNodeNames, false); + final String primaryNode = getNode(dataNodeNames, true); + + SegmentReplicationTargetService targetService = internalCluster().getInstance(SegmentReplicationTargetService.class, replicaNode); + ensureGreen(INDEX_NAME); + blockNodeOnAnyFiles(REPOSITORY_NAME, replicaNode); + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + indexSingleDoc(); + refresh(INDEX_NAME); + waitForBlock(replicaNode, REPOSITORY_NAME, TimeValue.timeValueSeconds(10)); + final SegmentReplicationState state = targetService.getOngoingEventSegmentReplicationState(indexShard.shardId()); + assertEquals(SegmentReplicationState.Stage.GET_CHECKPOINT_INFO, state.getStage()); + ReplicationCollection.ReplicationRef segmentReplicationTargetReplicationRef = targetService.get( + state.getReplicationId() + ); + final SegmentReplicationTarget segmentReplicationTarget = segmentReplicationTargetReplicationRef.get(); + // close the target ref here otherwise it will hold a refcount + segmentReplicationTargetReplicationRef.close(); + assertNotNull(segmentReplicationTarget); + assertTrue(segmentReplicationTarget.refCount() > 0); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode)); + assertBusy(() -> { + assertTrue(indexShard.routingEntry().primary()); + assertNull(targetService.getOngoingEventSegmentReplicationState(indexShard.shardId())); + assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); + }); + unblockNode(REPOSITORY_NAME, replicaNode); + cleanupRepo(); + } + + private String getNode(Set dataNodeNames, boolean primary) { + assertEquals(2, dataNodeNames.size()); + for (String name : dataNodeNames) { + final IndexShard indexShard = getIndexShard(name, INDEX_NAME); + if (indexShard.routingEntry().primary() == primary) { + return name; + } + } + return null; + } + + private IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } +} diff --git a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java index 4fc721f2b96b5..727c57afd289b 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java @@ -16,7 +16,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.logging.Loggers; -import org.opensearch.common.util.concurrent.UncategorizedExecutionException; +import org.opensearch.common.util.CancellableThreads; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.recovery.RecoverySettings; @@ -51,9 +51,16 @@ public RemoteStoreFileDownloader(ShardId shardId, ThreadPool threadPool, Recover * @param source The remote directory to copy segment files from * @param destination The local directory to copy segment files to * @param toDownloadSegments The list of segment files to download + * @param listener Callback listener to be notified upon completion */ - public void download(Directory source, Directory destination, Collection toDownloadSegments) throws IOException { - downloadInternal(source, destination, null, toDownloadSegments, () -> {}); + public void downloadAsync( + CancellableThreads cancellableThreads, + Directory source, + Directory destination, + Collection toDownloadSegments, + ActionListener listener + ) { + downloadInternal(cancellableThreads, source, destination, null, toDownloadSegments, () -> {}, listener); } /** @@ -74,17 +81,37 @@ public void download( Directory secondDestination, Collection toDownloadSegments, Runnable onFileCompletion - ) throws IOException { - downloadInternal(source, destination, secondDestination, toDownloadSegments, onFileCompletion); + ) throws InterruptedException, IOException { + final CancellableThreads cancellableThreads = new CancellableThreads(); + final PlainActionFuture listener = PlainActionFuture.newFuture(); + downloadInternal(cancellableThreads, source, destination, secondDestination, toDownloadSegments, onFileCompletion, listener); + try { + listener.get(); + } catch (ExecutionException e) { + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } else if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new RuntimeException(e); + } catch (InterruptedException e) { + // If the blocking call on the PlainActionFuture itself is interrupted, then we must + // cancel the asynchronous work we were waiting on + cancellableThreads.cancel(e.getMessage()); + Thread.currentThread().interrupt(); + throw e; + } } private void downloadInternal( + CancellableThreads cancellableThreads, Directory source, Directory destination, @Nullable Directory secondDestination, Collection toDownloadSegments, - Runnable onFileCompletion - ) throws IOException { + Runnable onFileCompletion, + ActionListener listener + ) { final Queue queue = new ConcurrentLinkedQueue<>(toDownloadSegments); // Choose the minimum of: // - number of files to download @@ -95,25 +122,14 @@ private void downloadInternal( Math.min(threadPool.info(ThreadPool.Names.REMOTE_RECOVERY).getMax(), recoverySettings.getMaxConcurrentRemoteStoreStreams()) ); logger.trace("Starting download of {} files with {} threads", queue.size(), threads); - final PlainActionFuture> listener = PlainActionFuture.newFuture(); - final ActionListener allFilesListener = new GroupedActionListener<>(listener, threads); + final ActionListener allFilesListener = new GroupedActionListener<>(ActionListener.map(listener, r -> null), threads); for (int i = 0; i < threads; i++) { - copyOneFile(source, destination, secondDestination, queue, onFileCompletion, allFilesListener); - } - try { - listener.actionGet(); - } catch (UncategorizedExecutionException e) { - // Any IOException will be double-wrapped so dig it out and throw it - if (e.getCause() instanceof ExecutionException) { - if (e.getCause().getCause() instanceof IOException) { - throw (IOException) e.getCause().getCause(); - } - } - throw e; + copyOneFile(cancellableThreads, source, destination, secondDestination, queue, onFileCompletion, allFilesListener); } } private void copyOneFile( + CancellableThreads cancellableThreads, Directory source, Directory destination, @Nullable Directory secondDestination, @@ -129,18 +145,20 @@ private void copyOneFile( threadPool.executor(ThreadPool.Names.REMOTE_RECOVERY).submit(() -> { logger.trace("Downloading file {}", file); try { - destination.copyFrom(source, file, file, IOContext.DEFAULT); - onFileCompletion.run(); - if (secondDestination != null) { - secondDestination.copyFrom(destination, file, file, IOContext.DEFAULT); - } + cancellableThreads.executeIO(() -> { + destination.copyFrom(source, file, file, IOContext.DEFAULT); + onFileCompletion.run(); + if (secondDestination != null) { + secondDestination.copyFrom(destination, file, file, IOContext.DEFAULT); + } + }); } catch (Exception e) { // Clear the queue to stop any future processing, report the failure, then return queue.clear(); listener.onFailure(e); return; } - copyOneFile(source, destination, secondDestination, queue, onFileCompletion, listener); + copyOneFile(cancellableThreads, source, destination, secondDestination, queue, onFileCompletion, listener); }); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index 12eabf1e6554f..b06b3e0497cf7 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -15,6 +15,7 @@ import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.util.Version; import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.util.CancellableThreads; import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; @@ -24,11 +25,14 @@ import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -43,6 +47,7 @@ public class RemoteStoreReplicationSource implements SegmentReplicationSource { private final IndexShard indexShard; private final RemoteSegmentStoreDirectory remoteDirectory; + private final CancellableThreads cancellableThreads = new CancellableThreads(); public RemoteStoreReplicationSource(IndexShard indexShard) { this.indexShard = indexShard; @@ -61,7 +66,7 @@ public void getCheckpointMetadata( // TODO: Need to figure out a way to pass this information for segment metadata via remote store. try (final GatedCloseable segmentInfosSnapshot = indexShard.getSegmentInfosSnapshot()) { final Version version = segmentInfosSnapshot.get().getCommitLuceneVersion(); - RemoteSegmentMetadata mdFile = remoteDirectory.init(); + final RemoteSegmentMetadata mdFile = getRemoteSegmentMetadata(); // During initial recovery flow, the remote store might not // have metadata as primary hasn't uploaded anything yet. if (mdFile == null && indexShard.state().equals(IndexShardState.STARTED) == false) { @@ -106,39 +111,50 @@ public void getSegmentFiles( } logger.debug("Downloading segment files from remote store {}", filesToFetch); - RemoteSegmentMetadata remoteSegmentMetadata = remoteDirectory.readLatestMetadataFile(); - Collection directoryFiles = List.of(indexShard.store().directory().listAll()); - if (remoteSegmentMetadata != null) { - try { - indexShard.store().incRef(); - indexShard.remoteStore().incRef(); - final Directory storeDirectory = indexShard.store().directory(); - final List toDownloadSegmentNames = new ArrayList<>(); - for (StoreFileMetadata fileMetadata : filesToFetch) { - String file = fileMetadata.name(); - assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; - toDownloadSegmentNames.add(file); - } - indexShard.getFileDownloader() - .download( - remoteDirectory, - new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), - toDownloadSegmentNames - ); - logger.debug("Downloaded segment files from remote store {}", filesToFetch); - } finally { - indexShard.store().decRef(); - indexShard.remoteStore().decRef(); + if (remoteMetadataExists()) { + final Directory storeDirectory = indexShard.store().directory(); + final Collection directoryFiles = List.of(storeDirectory.listAll()); + final List toDownloadSegmentNames = new ArrayList<>(); + for (StoreFileMetadata fileMetadata : filesToFetch) { + String file = fileMetadata.name(); + assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; + toDownloadSegmentNames.add(file); } + indexShard.getFileDownloader() + .downloadAsync( + cancellableThreads, + remoteDirectory, + new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), + toDownloadSegmentNames, + ActionListener.map(listener, r -> new GetSegmentFilesResponse(filesToFetch)) + ); + } else { + listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } - listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); - } catch (Exception e) { + } catch (IOException | RuntimeException e) { listener.onFailure(e); } } + @Override + public void cancel() { + this.cancellableThreads.cancel("Canceled by target"); + } + @Override public String getDescription() { return "RemoteStoreReplicationSource"; } + + private boolean remoteMetadataExists() throws IOException { + final AtomicBoolean metadataExists = new AtomicBoolean(false); + cancellableThreads.executeIO(() -> metadataExists.set(remoteDirectory.readLatestMetadataFile() != null)); + return metadataExists.get(); + } + + private RemoteSegmentMetadata getRemoteSegmentMetadata() throws IOException { + AtomicReference mdFile = new AtomicReference<>(); + cancellableThreads.executeIO(() -> mdFile.set(remoteDirectory.init())); + return mdFile.get(); + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index ec6b4d06b32c3..aac59df4f6573 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -91,6 +91,9 @@ public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIn // make sure the store is not released until we are done. this.cancellableThreads = new CancellableThreads(); store.incRef(); + if (indexShard.indexSettings().isRemoteStoreEnabled()) { + indexShard.remoteStore().incRef(); + } } public long getId() { @@ -278,6 +281,12 @@ public abstract void writeFileChunk( ); protected void closeInternal() { - store.decRef(); + try { + store.decRef(); + } finally { + if (indexShard.indexSettings().isRemoteStoreEnabled()) { + indexShard.remoteStore().decRef(); + } + } } } diff --git a/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java b/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java index 588d9e8bb13a2..6d8b3fe4d69fb 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java @@ -9,12 +9,18 @@ package org.opensearch.index.store; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.support.PlainActionFuture; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.CancellableThreads; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.test.OpenSearchTestCase; @@ -31,8 +37,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; public class RemoteStoreFileDownloaderTests extends OpenSearchTestCase { @@ -76,31 +84,132 @@ public void stopThreadPool() throws Exception { } public void testDownload() throws IOException { - fileDownloader.download(source, destination, files.keySet()); + final PlainActionFuture l = new PlainActionFuture<>(); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, files.keySet(), l); + l.actionGet(); assertContent(files, destination); } - public void testDownloadWithSecondDestination() throws IOException { + public void testDownloadWithSecondDestination() throws IOException, InterruptedException { fileDownloader.download(source, destination, secondDestination, files.keySet(), () -> {}); assertContent(files, destination); assertContent(files, secondDestination); } - public void testDownloadWithFileCompletionHandler() throws IOException { + public void testDownloadWithFileCompletionHandler() throws IOException, InterruptedException { final AtomicInteger counter = new AtomicInteger(0); fileDownloader.download(source, destination, null, files.keySet(), counter::incrementAndGet); assertContent(files, destination); assertEquals(files.size(), counter.get()); } - public void testDownloadNonExistentFile() { - assertThrows(NoSuchFileException.class, () -> fileDownloader.download(source, destination, Set.of("not real"))); + public void testDownloadNonExistentFile() throws InterruptedException { + final CountDownLatch latch = new CountDownLatch(1); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, Set.of("not real"), new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(NoSuchFileException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); } - public void testDownloadExtraNonExistentFile() { - List filesWithExtra = new ArrayList<>(files.keySet()); + public void testDownloadExtraNonExistentFile() throws InterruptedException { + final CountDownLatch latch = new CountDownLatch(1); + final List filesWithExtra = new ArrayList<>(files.keySet()); filesWithExtra.add("not real"); - assertThrows(NoSuchFileException.class, () -> fileDownloader.download(source, destination, filesWithExtra)); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, filesWithExtra, new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(NoSuchFileException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); + } + + public void testCancellable() { + final CancellableThreads cancellableThreads = new CancellableThreads(); + final PlainActionFuture blockingListener = new PlainActionFuture<>(); + final Directory blockingDestination = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) { + try { + Thread.sleep(60_000); // Will be interrupted + fail("Expected to be interrupted"); + } catch (InterruptedException e) { + throw new RuntimeException("Failed due to interrupt", e); + } + } + }; + fileDownloader.downloadAsync(cancellableThreads, source, blockingDestination, files.keySet(), blockingListener); + assertThrows( + "Expected to timeout due to blocking directory", + OpenSearchTimeoutException.class, + () -> blockingListener.actionGet(TimeValue.timeValueMillis(500)) + ); + cancellableThreads.cancel("test"); + assertThrows( + "Expected to complete with cancellation failure", + CancellableThreads.ExecutionCancelledException.class, + blockingListener::actionGet + ); + } + + public void testBlockingCallCanBeInterrupted() throws Exception { + final Directory blockingDestination = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) { + try { + Thread.sleep(60_000); // Will be interrupted + fail("Expected to be interrupted"); + } catch (InterruptedException e) { + throw new RuntimeException("Failed due to interrupt", e); + } + } + }; + final AtomicReference capturedException = new AtomicReference<>(); + final Thread thread = new Thread(() -> { + try { + fileDownloader.download(source, blockingDestination, null, files.keySet(), () -> {}); + } catch (Exception e) { + capturedException.set(e); + } + }); + thread.start(); + thread.interrupt(); + thread.join(); + assertEquals(InterruptedException.class, capturedException.get().getClass()); + } + + public void testIOException() throws IOException, InterruptedException { + final Directory failureDirectory = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { + throw new IOException("test"); + } + }; + assertThrows(IOException.class, () -> fileDownloader.download(source, failureDirectory, null, files.keySet(), () -> {})); + + final CountDownLatch latch = new CountDownLatch(1); + fileDownloader.downloadAsync(new CancellableThreads(), source, failureDirectory, files.keySet(), new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(IOException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); } private static void assertContent(Map expected, Directory destination) throws IOException { diff --git a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java index 1bb1e44a8a600..0ee889af5ce1a 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -331,6 +331,12 @@ public static void blockNodeOnAnyFiles(String repository, String nodeName) { ); } + public static void blockNodeOnAnySegmentFile(String repository, String nodeName) { + ((MockRepository) internalCluster().getInstance(RepositoriesService.class, nodeName).repository(repository)).blockOnSegmentFiles( + true + ); + } + public static void blockDataNode(String repository, String nodeName) { ((MockRepository) internalCluster().getInstance(RepositoriesService.class, nodeName).repository(repository)).blockOnDataFiles(true); } diff --git a/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java index 7db71c4be0968..72c4ba44d0a31 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java @@ -139,6 +139,8 @@ public long getFailureCount() { private volatile boolean blockOnDataFiles; + private volatile boolean blockOnSegmentFiles; + private volatile boolean blockOnDeleteIndexN; /** @@ -190,6 +192,7 @@ public MockRepository( maximumNumberOfFailures = metadata.settings().getAsLong("max_failure_number", 100L); blockOnAnyFiles = metadata.settings().getAsBoolean("block_on_control", false); blockOnDataFiles = metadata.settings().getAsBoolean("block_on_data", false); + blockOnSegmentFiles = metadata.settings().getAsBoolean("block_on_segment", false); blockAndFailOnWriteSnapFile = metadata.settings().getAsBoolean("block_on_snap", false); randomPrefix = metadata.settings().get("random", "default"); waitAfterUnblock = metadata.settings().getAsLong("wait_after_unblock", 0L); @@ -237,6 +240,7 @@ public synchronized void unblock() { blocked = false; // Clean blocking flags, so we wouldn't try to block again blockOnDataFiles = false; + blockOnSegmentFiles = false; blockOnAnyFiles = false; blockAndFailOnWriteIndexFile = false; blockOnWriteIndexFile = false; @@ -259,6 +263,14 @@ public void setBlockOnAnyFiles(boolean blocked) { blockOnAnyFiles = blocked; } + public void blockOnSegmentFiles(boolean blocked) { + blockOnSegmentFiles = blocked; + } + + public void setBlockOnSegmentFiles(boolean blocked) { + blockOnSegmentFiles = blocked; + } + public void setBlockAndFailOnWriteSnapFiles(boolean blocked) { blockAndFailOnWriteSnapFile = blocked; } @@ -306,6 +318,7 @@ private synchronized boolean blockExecution() { boolean wasBlocked = false; try { while (blockOnDataFiles + || blockOnSegmentFiles || blockOnAnyFiles || blockAndFailOnWriteIndexFile || blockOnWriteIndexFile @@ -407,6 +420,8 @@ private void maybeIOExceptionOrBlock(String blobName) throws IOException { blockExecutionAndMaybeWait(blobName); } else if (blobName.startsWith("snap-") && blockAndFailOnWriteSnapFile) { blockExecutionAndFail(blobName); + } else if (blockOnSegmentFiles && blobName.contains(".si__")) { + blockExecutionAndMaybeWait(blobName); } } } From c400d84f0e884217454ddfcc1503d02e0b280fa9 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Fri, 20 Oct 2023 17:39:58 +0530 Subject: [PATCH 09/26] [Remote State] fix lock release before deletion is completed (#10611) * fix lock release before deletion is completed Signed-off-by: bansvaru --- .../remote/RemoteClusterStateService.java | 8 +++-- .../RemoteClusterStateServiceTests.java | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index b9d06c8fbb1c1..96ce2fc779ea0 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -1072,7 +1072,8 @@ public void onFailure(Exception e) { * @param clusterUUID uuid of cluster state to refer to in remote * @param manifestsToRetain no of latest manifest files to keep in remote */ - private void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { logger.info("Delete stale cluster metadata task is already in progress."); return; @@ -1109,8 +1110,9 @@ public void onFailure(Exception e) { } } ); - } finally { + } catch (Exception e) { deleteStaleMetadataRunning.set(false); + throw e; } } @@ -1190,7 +1192,7 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List { String clusterName = clusterState.getClusterName().value(); - logger.info("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); Set allClustersUUIDsInRemote; try { allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 49b7f0ff8d1a9..433eac63e9580 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -62,6 +62,9 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import org.mockito.ArgumentCaptor; @@ -73,6 +76,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -1004,6 +1008,36 @@ public void testFileNames() { assertThat(splittedName[3], is("P")); } + public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { + BlobContainer blobContainer = mock(BlobContainer.class); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> { + callCount.incrementAndGet(); + if (latch.await(5000, TimeUnit.SECONDS) == false) { + throw new Exception("Timed out waiting for delete task queuing to complete"); + } + return null; + }).when(blobContainer) + .listBlobsByPrefixInSortedOrder( + any(String.class), + any(int.class), + any(BlobContainer.BlobNameSortOrder.class), + any(ActionListener.class) + ); + + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + + latch.countDown(); + assertBusy(() -> assertEquals(1, callCount.get())); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); From 6641ef8a0a39d86aadd272936cae25a406eb3485 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Fri, 20 Oct 2023 18:06:40 +0530 Subject: [PATCH 10/26] Restore remote index shards with ExistingStoreRecoverySource after restore from remote state (#10665) * Restore remote index shards with ExistingStoreRecoverySource after restore from remote state Signed-off-by: bansvaru --- .../remotestore/BaseRemoteStoreRestoreIT.java | 5 +- .../RemoteStoreClusterStateRestoreIT.java | 28 ++- .../cluster/routing/IndexRoutingTable.java | 4 +- .../gateway/ClusterStateUpdaters.java | 17 +- .../recovery/RemoteStoreRestoreService.java | 30 +-- .../gateway/ClusterStateUpdatersTests.java | 237 ++---------------- 6 files changed, 53 insertions(+), 268 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index b8481610869e6..99c5d7fb2bae7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -46,7 +46,10 @@ protected Collection> nodePlugins() { } protected void restore(String... indices) { - boolean restoreAllShards = randomBoolean(); + restore(randomBoolean(), indices); + } + + protected void restore(boolean restoreAllShards, String... indices) { if (restoreAllShards) { assertAcked(client().admin().indices().prepareClose(indices)); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 3a3e293de9b13..c2cb7cc60f152 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -65,6 +65,13 @@ private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().startDataOnlyNodes(dataNodeCount); } + protected void verifyRedIndicesAndTriggerRestore(Map indexStats, String indexName, boolean indexMoreDocs) + throws Exception { + ensureRed(indexName); + restore(false, indexName); + verifyRestoredData(indexStats, indexName, indexMoreDocs); + } + public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -83,7 +90,7 @@ public void testFullClusterRestore() throws Exception { // Step - 3 Trigger full cluster restore and validate validateMetadata(List.of(INDEX_NAME)); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } public void testFullClusterRestoreMultipleIndices() throws Exception { @@ -112,8 +119,8 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { // Step - 3 Trigger full cluster restore validateMetadata(List.of(INDEX_NAME, secondIndexName)); - verifyRestoredData(indexStats, INDEX_NAME); - verifyRestoredData(indexStats2, secondIndexName, false); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + verifyRedIndicesAndTriggerRestore(indexStats2, secondIndexName, false); assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); // Test is complete @@ -181,7 +188,7 @@ public void testRemoteStateFullRestart() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } private void validateMetadata(List indexNames) { @@ -246,19 +253,18 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { // Step - 3 Trigger full cluster restore and validate // validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME, false); - - // validate global metadata restored - verifyRestoredRepositories(); - verifyRestoredIndexTemplate(); assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); - // Test is complete - // Remote the cluster read only block to ensure proper cleanup updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(); + verifyRestoredIndexTemplate(); } private void registerCustomRepository() { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index b12698c8a320e..d77d44580798a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -466,12 +466,12 @@ public Builder initializeAsRemoteStoreRestore( } for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) { ShardId shardId = new ShardId(index, shardNumber); - if (forceRecoverAllPrimaries == false && indexShardRoutingTableMap.containsKey(shardId) == false) { + if (indexShardRoutingTableMap.containsKey(shardId) == false) { throw new IllegalStateException("IndexShardRoutingTable is not present for shardId: " + shardId); } IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingTableMap.get(shardId); - if (forceRecoverAllPrimaries || indexShardRoutingTable == null || indexShardRoutingTable.primaryShard().unassigned()) { + if (forceRecoverAllPrimaries || indexShardRoutingTable.primaryShard().unassigned()) { // Primary shard to be recovered from remote store. indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, true, recoverySource, unassignedInfo)); // All the replica shards to be recovered from peer recovery. diff --git a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java index 4c562b348f141..1563ac84bdd1c 100644 --- a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java +++ b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java @@ -41,7 +41,6 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.settings.ClusterSettings; @@ -121,21 +120,7 @@ static ClusterState updateRoutingTable(final ClusterState state) { // initialize all index routing tables as empty final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable()); for (final IndexMetadata cursor : state.metadata().indices().values()) { - // Whether IndexMetadata is recovered from local disk or remote it doesn't matter to us at this point. - // We are only concerned about index data recovery here. Which is why we only check for remote store enabled and not for remote - // cluster state enabled. - if (cursor.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false) == false - || state.routingTable().hasIndex(cursor.getIndex()) == false - || state.routingTable() - .index(cursor.getIndex()) - .shardsMatchingPredicateCount( - shardRouting -> shardRouting.primary() - // We need to ensure atleast one of the primaries is being recovered from remote. - // This ensures we have gone through the RemoteStoreRestoreService and routing table is updated - && shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) == 0) { - routingTableBuilder.addAsRecovery(cursor); - } + routingTableBuilder.addAsRecovery(cursor); } // start with 0 based versions for routing table routingTableBuilder.version(0); diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index ac9cf35d1d8e5..6692d521b8f65 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -183,6 +183,7 @@ private RemoteRestoreResult executeRestore( final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); int totalShards = 0; + boolean metadataFromRemoteStore = false; ClusterState.Builder builder = ClusterState.builder(currentState); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()); @@ -190,7 +191,7 @@ private RemoteRestoreResult executeRestore( for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { String indexName = indexMetadataEntry.getKey(); IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); + metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); IndexMetadata updatedIndexMetadata = indexMetadata; if (metadataFromRemoteStore == false && restoreAllShards) { updatedIndexMetadata = IndexMetadata.builder(indexMetadata) @@ -204,27 +205,23 @@ private RemoteRestoreResult executeRestore( IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID()); - Map indexShardRoutingTableMap = new HashMap<>(); if (metadataFromRemoteStore == false) { - indexShardRoutingTableMap = currentState.routingTable() + Map indexShardRoutingTableMap = currentState.routingTable() .index(indexName) .shards() .values() .stream() .collect(Collectors.toMap(IndexShardRoutingTable::shardId, Function.identity())); + + RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( + restoreUUID, + updatedIndexMetadata.getCreationVersion(), + indexId + ); + + rtBuilder.addAsRemoteStoreRestore(updatedIndexMetadata, recoverySource, indexShardRoutingTableMap, restoreAllShards); } - RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( - restoreUUID, - updatedIndexMetadata.getCreationVersion(), - indexId - ); - rtBuilder.addAsRemoteStoreRestore( - updatedIndexMetadata, - recoverySource, - indexShardRoutingTableMap, - restoreAllShards || metadataFromRemoteStore - ); blocks.updateBlocks(updatedIndexMetadata); mdBuilder.put(updatedIndexMetadata, true); indicesToBeRestored.add(indexName); @@ -239,7 +236,10 @@ private RemoteRestoreResult executeRestore( RoutingTable rt = rtBuilder.build(); ClusterState updatedState = builder.metadata(mdBuilder).blocks(blocks).routingTable(rt).build(); - return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); + if (metadataFromRemoteStore == false) { + updatedState = allocationService.reroute(updatedState, "restored from remote store"); + } + return RemoteRestoreResult.build(restoreUUID, restoreInfo, updatedState); } private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index 9b3fd45245ef7..1c43bb565ef69 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.UnassignedInfo; @@ -52,12 +53,14 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.repositories.IndexId; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; @@ -275,7 +278,7 @@ public void testUpdateRoutingTable() { } } - public void testSkipRoutingTableUpdateWhenRemoteRecovery() { + public void testRoutingTableUpdateWhenRemoteStateRecovery() { final int numOfShards = randomIntBetween(1, 10); final IndexMetadata remoteMetadata = createIndexMetadata( @@ -286,7 +289,7 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { .build() ); - // Test remote index routing table is generated with ExistingStoreRecoverySource if no routing table is present + // Test remote index routing table is generated with ExistingStoreRecoverySource { final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) @@ -322,48 +325,14 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { } - // Test remote index routing table is overridden if recovery source is not RemoteStoreRecoverySource + // Test remote index routing table is overridden if recovery source is RemoteStoreRecoverySource { - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsNew(remoteMetadata); final Index index = remoteMetadata.getIndex(); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .routingTable(new RoutingTable.Builder().add(remoteBuilderWithoutRemoteRecovery.build()).build()) - .build(); - assertTrue(initialState.routingTable().hasIndex(index)); - final ClusterState newState = updateRoutingTable(initialState); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - assertTrue(newState.routingTable().hasIndex(index)); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - - } - - // Test routing table update is skipped for a remote index - { + Map routingTableMap = new HashMap<>(); + for (int shardNumber = 0; shardNumber < remoteMetadata.getNumberOfShards(); shardNumber++) { + ShardId shardId = new ShardId(index, shardNumber); + routingTableMap.put(shardId, new IndexShardRoutingTable.Builder(new ShardId(remoteMetadata.getIndex(), 1)).build()); + } IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) .initializeAsRemoteStoreRestore( remoteMetadata, @@ -372,10 +341,9 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { remoteMetadata.getCreationVersion(), new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) ), - new HashMap<>(), + routingTableMap, true ); - final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) .metadata(Metadata.builder().put(remoteMetadata, false).build()) .routingTable(new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()).build()) @@ -387,205 +355,28 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - - } - - // Test reset routing table for 2 indices - one remote and one non remote. - // Routing table for non remote index should be updated and remote index routing table should remain intact - { - final IndexMetadata nonRemoteMetadata = createIndexMetadata( - "test-nonremote", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards).build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder nonRemoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(nonRemoteMetadata.getIndex()) - .initializeAsNew(nonRemoteMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(nonRemoteMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(nonRemoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newNonRemoteIndexRoutingTable = newState.routingTable().index(nonRemoteMetadata.getIndex()); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) ) ); assertEquals( numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - } - - // Test reset routing table for 2 indices, both remote backed but only once index has RemoteStoreRecoverySource. - // Routing table for only remote index without RemoteStoreRecoverySource should be updated - { - final IndexMetadata remoteWithoutRemoteRecoveryMetadata = createIndexMetadata( - "test-remote-without-recovery", - Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) - .build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder( - remoteWithoutRemoteRecoveryMetadata.getIndex() - ).initializeAsNew(remoteWithoutRemoteRecoveryMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(remoteWithoutRemoteRecoveryMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(remoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newRemoteWithoutRemoteRecoveryIndexRoutingTable = newState.routingTable() - .index(remoteWithoutRemoteRecoveryMetadata.getIndex()); - assertEquals( - 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) ) ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource ) ); assertEquals( numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource ) ); + } } From e12ab0f9ed5cfa32fbbcf654253180a72e50d0d7 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 18:18:36 +0530 Subject: [PATCH 11/26] Fix flaky remote cluster state UT (#10780) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 433eac63e9580..bcc58789dd6fd 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -458,7 +458,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { mockBlobStoreObjects(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) - .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata).version(randomNonNegativeLong())) .build(); final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() .codecVersion(2) @@ -479,6 +479,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { // new cluster state where only global metadata is different Metadata newMetadata = Metadata.builder(clusterState.metadata()) .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .version(randomNonNegativeLong()) .build(); ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); @@ -1281,7 +1282,7 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() ); } From 5093cc71476f5796f3b91fb18ff867cfe37588c0 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 19:18:25 +0530 Subject: [PATCH 12/26] Fix spotless failure (#10782) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index bcc58789dd6fd..5202f31c514ed 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -1282,7 +1282,12 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder() + .version(randomNonNegativeLong()) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() ); } From e691df09c66dcc1693897543fd7633c4b208ce48 Mon Sep 17 00:00:00 2001 From: rayshrey <121871912+rayshrey@users.noreply.github.com> Date: Fri, 20 Oct 2023 20:36:20 +0530 Subject: [PATCH 13/26] Add tracing instrumentation for indexing paths (#10273) * Add tracing instrumentation for indexing paths Signed-off-by: Shreyansh Ray * Fix failing tests and review changes Signed-off-by: Shreyansh Ray * Fix test failures due to Span not being properly closed Signed-off-by: Shreyansh Ray * Changes to spans in primary and replica actions Signed-off-by: Shreyansh Ray * Review comments fixes and refactoring Signed-off-by: Shreyansh Ray * Precommit auto-changes Signed-off-by: Shreyansh Ray * Add refresh policy as attribute Signed-off-by: Shreyansh Ray * Fix changelog entry Signed-off-by: Shreyansh Ray * Instrument primary/replica write in TransportWriteAction instead of TransportShardBulkAction Signed-off-by: Shreyansh Ray * Modify SpanBuilder Signed-off-by: Shreyansh Ray * spotlessApply and precommit Signed-off-by: Shreyansh Ray * Change span names Signed-off-by: Shreyansh Ray * Pass Noop Tracer instead of injected tracer Signed-off-by: Shreyansh Ray * Reverting previous changes Signed-off-by: Shreyansh Ray * Remove tracer variable from TransportShardBulkAction Signed-off-by: Shreyansh Ray --------- Signed-off-by: Shreyansh Ray --- CHANGELOG.md | 1 + .../action/bulk/TransportBulkAction.java | 110 +++++++++++------- .../action/bulk/TransportShardBulkAction.java | 7 +- .../TransportResyncReplicationAction.java | 7 +- .../replication/TransportWriteAction.java | 24 +++- .../index/seqno/RetentionLeaseSyncAction.java | 7 +- .../telemetry/tracing/AttributeNames.java | 25 ++++ .../telemetry/tracing/SpanBuilder.java | 20 ++++ ...ActionIndicesThatCannotBeCreatedTests.java | 4 +- .../bulk/TransportBulkActionIngestTests.java | 4 +- .../action/bulk/TransportBulkActionTests.java | 3 +- .../bulk/TransportBulkActionTookTests.java | 3 +- .../bulk/TransportShardBulkActionTests.java | 13 ++- ...TransportResyncReplicationActionTests.java | 6 +- ...rtWriteActionForIndexingPressureTests.java | 3 +- .../TransportWriteActionTests.java | 6 +- .../seqno/RetentionLeaseSyncActionTests.java | 12 +- .../snapshots/SnapshotResiliencyTests.java | 9 +- 18 files changed, 192 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c52c43a35b8d..d7d492679c79d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) +- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 726ba7ba119af..4a9b07c12821d 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -85,6 +85,11 @@ import org.opensearch.ingest.IngestService; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportService; @@ -133,6 +138,7 @@ public class TransportBulkAction extends HandledTransportAction() { - @Override - public void onResponse(BulkShardResponse bulkShardResponse) { - for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { - // we may have no response if item failed - if (bulkItemResponse.getResponse() != null) { - bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); - } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(bulkItemResponse.getItemId(), bulkItemResponse); - } + final Span span = tracer.startSpan(SpanBuilder.from("bulkShardAction", nodeId, bulkShardRequest)); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + shardBulkAction.execute( + bulkShardRequest, + TraceableActionListener.create(ActionListener.runBefore(new ActionListener() { + @Override + public void onResponse(BulkShardResponse bulkShardResponse) { + for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { + // we may have no response if item failed + if (bulkItemResponse.getResponse() != null) { + bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + docStatusStats.inc(bulkItemResponse.status()); + responses.set(bulkItemResponse.getItemId(), bulkItemResponse); + } - @Override - public void onFailure(Exception e) { - // create failures for all relevant requests - for (BulkItemRequest request : requests) { - final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); - final DocWriteRequest docWriteRequest = request.request(); - final BulkItemResponse bulkItemResponse = new BulkItemResponse( - request.id(), - docWriteRequest.opType(), - new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) - ); + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(request.id(), bulkItemResponse); - } + @Override + public void onFailure(Exception e) { + // create failures for all relevant requests + for (BulkItemRequest request : requests) { + final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); + final DocWriteRequest docWriteRequest = request.request(); + final BulkItemResponse bulkItemResponse = new BulkItemResponse( + request.id(), + docWriteRequest.opType(), + new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) + ); + + docStatusStats.inc(bulkItemResponse.status()); + responses.set(request.id(), bulkItemResponse); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - private void finishHim() { - indicesService.addDocStatusStats(docStatusStats); - listener.onResponse( - new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTimeNanos)) - ); - } - }, releasable::close)); + private void finishHim() { + indicesService.addDocStatusStats(docStatusStats); + listener.onResponse( + new BulkResponse( + responses.toArray(new BulkItemResponse[responses.length()]), + buildTookInMillis(startTimeNanos) + ) + ); + } + }, releasable::close), span, tracer) + ); + } catch (Exception e) { + span.setError(e); + span.endSpan(); + throw e; + } } bulkRequest = null; // allow memory for bulk request items to be reclaimed before all items have been completed } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index fddda0ef1f9a7..268a6ed6f85b8 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -99,6 +99,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportChannel; @@ -161,7 +162,8 @@ public TransportShardBulkAction( IndexingPressureService indexingPressureService, SegmentReplicationPressureService segmentReplicationPressureService, RemoteStorePressureService remoteStorePressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -177,7 +179,8 @@ public TransportShardBulkAction( EXECUTOR_NAME_FUNCTION, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); this.updateHelper = updateHelper; this.mappingUpdatedAction = mappingUpdatedAction; diff --git a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java index 032fe83e2220b..9d60706d1f100 100644 --- a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java @@ -54,6 +54,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportException; @@ -93,7 +94,8 @@ public TransportResyncReplicationAction( ShardStateAction shardStateAction, ActionFilters actionFilters, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -109,7 +111,8 @@ public TransportResyncReplicationAction( EXECUTOR_NAME_FUNCTION, true, /* we should never reject resync because of thread pool capacity on primary */ indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java index a0b5299805868..9ebfa8cfd0df8 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java @@ -59,6 +59,11 @@ import org.opensearch.index.translog.Translog.Location; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -82,6 +87,7 @@ public abstract class TransportWriteAction< protected final SystemIndices systemIndices; private final Function executorFunction; + private final Tracer tracer; protected TransportWriteAction( Settings settings, @@ -97,7 +103,8 @@ protected TransportWriteAction( Function executorFunction, boolean forceExecutionOnPrimary, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { // We pass ThreadPool.Names.SAME to the super class as we control the dispatching to the // ThreadPool.Names.WRITE/ThreadPool.Names.SYSTEM_WRITE thread pools in this class. @@ -119,6 +126,7 @@ protected TransportWriteAction( this.executorFunction = executorFunction; this.indexingPressureService = indexingPressureService; this.systemIndices = systemIndices; + this.tracer = tracer; } protected String executor(IndexShard shard) { @@ -220,7 +228,12 @@ protected void shardOperationOnPrimary( threadPool.executor(executor).execute(new ActionRunnable>(listener) { @Override protected void doRun() { - dispatchedShardOperationOnPrimary(request, primary, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnPrimary", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnPrimary(request, primary, TraceableActionListener.create(listener, span, tracer)); + } } @Override @@ -248,7 +261,12 @@ protected void shardOperationOnReplica(ReplicaRequest request, IndexShard replic threadPool.executor(executorFunction.apply(replica)).execute(new ActionRunnable(listener) { @Override protected void doRun() { - dispatchedShardOperationOnReplica(request, replica, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnReplica", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnReplica(request, replica, TraceableActionListener.create(listener, span, tracer)); + } } @Override diff --git a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java index f74fc7eefe65c..ca3c7e1d49700 100644 --- a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java @@ -62,6 +62,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; @@ -99,7 +100,8 @@ public RetentionLeaseSyncAction( final ShardStateAction shardStateAction, final ActionFilters actionFilters, final IndexingPressureService indexingPressureService, - final SystemIndices systemIndices + final SystemIndices systemIndices, + final Tracer tracer ) { super( settings, @@ -115,7 +117,8 @@ public RetentionLeaseSyncAction( ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index a9514c298ef88..b6b2cf360d1c5 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -69,4 +69,29 @@ private AttributeNames() { * Action Name. */ public static final String TRANSPORT_ACTION = "action"; + + /** + * Index Name + */ + public static final String INDEX = "index"; + + /** + * Shard ID + */ + public static final String SHARD_ID = "shard_id"; + + /** + * Number of request items in bulk request + */ + public static final String BULK_REQUEST_ITEMS = "bulk_request_items"; + + /** + * Node ID + */ + public static final String NODE_ID = "node_id"; + + /** + * Refresh Policy + */ + public static final String REFRESH_POLICY = "refresh_policy"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index d97fbd371ab2a..1dce422943b7a 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -8,6 +8,8 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.action.bulk.BulkShardRequest; +import org.opensearch.action.support.replication.ReplicatedWriteRequest; import org.opensearch.common.annotation.InternalApi; import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; @@ -68,6 +70,10 @@ public static SpanCreationContext from(String action, Transport.Connection conne return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); } + public static SpanCreationContext from(String spanName, String nodeId, ReplicatedWriteRequest request) { + return SpanCreationContext.server().name(spanName).attributes(buildSpanAttributes(nodeId, request)); + } + private static String createSpanName(HttpRequest httpRequest) { return httpRequest.method().name() + SEPARATOR + httpRequest.uri(); } @@ -150,4 +156,18 @@ private static Attributes buildSpanAttributes(String action, TcpChannel tcpChann return attributes; } + private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequest request) { + Attributes attributes = Attributes.create() + .addAttribute(AttributeNames.NODE_ID, nodeId) + .addAttribute(AttributeNames.REFRESH_POLICY, request.getRefreshPolicy().getValue()); + if (request.shardId() != null) { + attributes.addAttribute(AttributeNames.INDEX, request.shardId().getIndexName()) + .addAttribute(AttributeNames.SHARD_ID, request.shardId().getId()); + } + if (request instanceof BulkShardRequest) { + attributes.addAttribute(AttributeNames.BULK_REQUEST_ITEMS, ((BulkShardRequest) request).items().length); + } + return attributes; + } + } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index 0f67eff26cbde..cf7080ab2fc06 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -54,6 +54,7 @@ import org.opensearch.index.VersionType; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -155,7 +156,8 @@ private void indicesThatCannotBeCreatedTestCase( new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) { @Override void executeBulk( diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 515f6eae28a34..141c630b94020 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -70,6 +70,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -172,7 +173,8 @@ class TestTransportBulkAction extends TransportBulkAction { new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java index 10cad6fb147a2..6bbd740df7f9c 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java @@ -118,7 +118,8 @@ class TestTransportBulkAction extends TransportBulkAction { new AutoCreateIndex(Settings.EMPTY, clusterService.getClusterSettings(), new Resolver(), new SystemIndices(emptyMap())), new IndexingPressureService(Settings.EMPTY, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java index 852e3837e1e7a..9d5b4430ea395 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java @@ -282,7 +282,8 @@ static class TestTransportBulkAction extends TransportBulkAction { new IndexingPressureService(Settings.EMPTY, clusterService), null, new SystemIndices(emptyMap()), - relativeTimeProvider + relativeTimeProvider, + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index fe0fdd07025d9..b325cfa197933 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -88,6 +88,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -1074,7 +1075,8 @@ public void testHandlePrimaryTermValidationRequestWithDifferentAllocationId() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId + "-1", 1, shardId), @@ -1105,7 +1107,8 @@ public void testHandlePrimaryTermValidationRequestWithOlderPrimaryTerm() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1136,7 +1139,8 @@ public void testHandlePrimaryTermValidationRequestSuccess() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1178,7 +1182,8 @@ private TransportShardBulkAction createAction() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java index 3bd8930064563..da87a0a967f53 100644 --- a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java +++ b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java @@ -203,7 +203,8 @@ public void testResyncDoesNotBlockOnPrimaryAction() throws Exception { shardStateAction, new ActionFilters(new HashSet<>()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertThat(action.globalBlockLevel(), nullValue()); @@ -256,7 +257,8 @@ private TransportResyncReplicationAction createAction() { mock(ShardStateAction.class), new ActionFilters(new HashSet<>()), mock(IndexingPressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java index 4a2185d1558f7..7212b1f5efe13 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java @@ -392,7 +392,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, TransportWriteActionForIndexingPressureTests.this.indexingPressureService, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java index 9d2069ac16190..b4549f82230bf 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java @@ -477,7 +477,8 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(Settings.EMPTY, TransportWriteActionTests.this.clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = withDocumentFailureOnPrimary; this.withDocumentFailureOnReplica = withDocumentFailureOnReplica; @@ -505,7 +506,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = false; this.withDocumentFailureOnReplica = false; diff --git a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java index d9bca55a208c2..63a9ac2f2e8ec 100644 --- a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java @@ -125,7 +125,8 @@ public void testRetentionLeaseSyncActionOnPrimary() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -162,7 +163,8 @@ public void testRetentionLeaseSyncActionOnReplica() throws Exception { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -203,7 +205,8 @@ public void testBlocks() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertNull(action.indexBlockLevel()); @@ -233,7 +236,8 @@ private RetentionLeaseSyncAction createAction() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 2f9f38d18a064..710717532ceb4 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2124,7 +2124,8 @@ public void onFailure(final Exception e) { shardStateAction, actionFilters, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ), new GlobalCheckpointSyncAction( @@ -2190,7 +2191,8 @@ public void onFailure(final Exception e) { mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); actions.put( BulkAction.INSTANCE, @@ -2214,7 +2216,8 @@ public void onFailure(final Exception e) { new AutoCreateIndex(settings, clusterSettings, indexNameExpressionResolver, new SystemIndices(emptyMap())), new IndexingPressureService(settings, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ); final RestoreService restoreService = new RestoreService( From a1fde65fe2ad1acda4364cf5fb751e9b494327ce Mon Sep 17 00:00:00 2001 From: Ashish Date: Fri, 20 Oct 2023 21:06:42 +0530 Subject: [PATCH 14/26] [Remote Store] Fix relocation failure due to transport receive timeout (#10761) * [Remote Store] Fix relocation failure due to transport receive timeout Signed-off-by: Ashish Singh * Fix existing extended shardIdle for remote backed shards Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- .../opensearch/remotestore/RemoteStoreIT.java | 23 +++++++++++++++++++ .../org/opensearch/index/IndexSettings.java | 3 +++ .../opensearch/index/shard/IndexShard.java | 6 ++++- .../translog/InternalTranslogManager.java | 10 ++++---- .../index/translog/RemoteFsTranslog.java | 5 ++++ .../opensearch/index/translog/Translog.java | 4 ++++ .../recovery/PeerRecoverySourceService.java | 3 ++- .../recovery/RemoteRecoveryTargetHandler.java | 13 +++++++++-- .../index/shard/RemoteIndexShardTests.java | 9 ++++++++ .../SegmentReplicationIndexShardTests.java | 12 ++++++---- 10 files changed, 75 insertions(+), 13 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 1fb5c2052aded..b3b4f8e10fd31 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -509,4 +509,27 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti assertHitCount(client(dataNodes.get(1)).prepareSearch(INDEX_NAME).setSize(0).get(), 50); }); } + + public void testNoSearchIdleForAnyReplicaCount() throws ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + String primaryShardNode = internalCluster().startDataOnlyNodes(1).get(0); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureGreen(INDEX_NAME); + IndexShard indexShard = getIndexShard(primaryShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + + String replicaShardNode = internalCluster().startDataOnlyNodes(1).get(0); + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureGreen(INDEX_NAME); + assertFalse(indexShard.isSearchIdleSupported()); + + indexShard = getIndexShard(replicaShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + } } diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e90e9259f6a5c..99d2b5a74c406 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -1024,6 +1024,9 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } private void setSearchIdleAfter(TimeValue searchIdleAfter) { + if (this.isRemoteStoreEnabled) { + logger.warn("Search idle is not supported for remote backed indices"); + } if (this.replicationType == ReplicationType.SEGMENT && this.getNumberOfReplicas() > 0) { logger.warn("Search idle is not supported for indices with replicas using 'replication.type: SEGMENT'"); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 5ebfd3863a6cf..1b7d1b2716979 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4425,7 +4425,6 @@ public final boolean isSearchIdle() { } /** - * * Returns true if this shard supports search idle. *

* Indices using Segment Replication will ignore search idle unless there are no replicas. @@ -4434,6 +4433,11 @@ public final boolean isSearchIdle() { * a new set of segments. */ public final boolean isSearchIdleSupported() { + // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // task continues to upload to remote store periodically. + if (isRemoteTranslogEnabled()) { + return false; + } return indexSettings.isSegRepEnabled() == false || indexSettings.getNumberOfReplicas() == 0; } diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 85c52b907d326..4d0fc13d433c6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,10 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - final long translogGenerationOfLastCommit = translog.getMinGenerationForSeqNo( - localCheckpointOfLastCommit + 1 - ).translogFileGeneration; - if (translog.sizeInBytesByMinGen(translogGenerationOfLastCommit) < flushThreshold) { + // This is the minimum seqNo that is referred in translog and considered for calculating translog size + long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; + if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } /* @@ -454,7 +454,7 @@ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long fl final long translogGenerationOfNewCommit = translog.getMinGenerationForSeqNo( localCheckpointTrackerSupplier.get().getProcessedCheckpoint() + 1 ).translogFileGeneration; - return translogGenerationOfLastCommit < translogGenerationOfNewCommit + return minReferencedTranslogGeneration < translogGenerationOfNewCommit || localCheckpointTrackerSupplier.get().getProcessedCheckpoint() == localCheckpointTrackerSupplier.get().getMaxSeqNo(); } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 29c825fd383c5..2dd9b1a545d4a 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -544,4 +544,9 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } } + + @Override + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minSeqNoToKeep; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index cf7f18840a03e..b44aa6e059224 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2034,4 +2034,8 @@ public static String createEmptyTranslog( writer.close(); return uuid; } + + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minUnrefCheckpointInLastCommit; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java index 6c7632a8a408d..cb2bedf00de99 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java @@ -376,7 +376,8 @@ private Tuple createRecovery transportService, request.targetNode(), recoverySettings, - throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime) + throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime), + shard.isRemoteTranslogEnabled() ); handler = RecoverySourceHandlerFactory.create(shard, recoveryTarget, request, recoverySettings); return Tuple.tuple(handler, recoveryTarget); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index 66f5b13449f05..37227596fdfe7 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -75,6 +75,7 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler { private final AtomicLong requestSeqNoGenerator = new AtomicLong(0); private final RetryableTransportClient retryableTransportClient; private final RemoteSegmentFileChunkWriter fileChunkWriter; + private final boolean remoteStoreEnabled; public RemoteRecoveryTargetHandler( long recoveryId, @@ -82,7 +83,8 @@ public RemoteRecoveryTargetHandler( TransportService transportService, DiscoveryNode targetNode, RecoverySettings recoverySettings, - Consumer onSourceThrottle + Consumer onSourceThrottle, + boolean remoteStoreEnabled ) { this.transportService = transportService; // It is safe to pass the retry timeout value here because RemoteRecoveryTargetHandler @@ -111,6 +113,7 @@ public RemoteRecoveryTargetHandler( requestSeqNoGenerator, onSourceThrottle ); + this.remoteStoreEnabled = remoteStoreEnabled; } public DiscoveryNode targetNode() { @@ -129,7 +132,13 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener reader = in -> TransportResponse.Empty.INSTANCE; final ActionListener responseListener = ActionListener.map(listener, r -> null); - retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + if (remoteStoreEnabled) { + // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the + // target host along with incremental segments download. + retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); + } else { + retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + } } @Override diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index fe389e3b3fcb4..703a7d457d5b6 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -471,6 +471,15 @@ public void onReplicationFailure( } } + @Override + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertFalse(primary.isSearchIdleSupported()); + assertTrue(primary.isSearchIdle()); + assertTrue(primary.scheduledRefresh()); + assertFalse(primary.hasRefreshPending()); + } + private void assertSingleSegmentFile(IndexShard shard, String fileName) throws IOException { final Set segmentsFileNames = Arrays.stream(shard.store().directory().listAll()) .filter(file -> file.startsWith(IndexFileNames.SEGMENTS)) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index eab38bfe5c64d..7caff3e5f5479 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -436,13 +436,17 @@ public void testShardIdleWithNoReplicas() throws Exception { shards.startAll(); final IndexShard primary = shards.getPrimary(); shards.indexDocs(randomIntBetween(1, 10)); - // ensure search idle conditions are met. - assertTrue(primary.isSearchIdle()); - assertFalse(primary.scheduledRefresh()); - assertTrue(primary.hasRefreshPending()); + validateShardIdleWithNoReplicas(primary); } } + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertTrue(primary.isSearchIdle()); + assertFalse(primary.scheduledRefresh()); + assertTrue(primary.hasRefreshPending()); + } + /** * here we are starting a new primary shard in PrimaryMode and testing if the shard publishes checkpoint after refresh. */ From ffe9371b38f98a2c9314bb6cfed4f8f224ff085c Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 20 Oct 2023 12:40:04 -0400 Subject: [PATCH 15/26] Update Github pull request template to have a task for inspecting failing checks (#10751) * Update Github pull request template to have a task for inspecting failing checks Signed-off-by: Andriy Redko * Add failing builds troubleshooting section to CONTRIBUTING.md Signed-off-by: Andriy Redko * Address review comments Signed-off-by: Andriy Redko --------- Signed-off-by: Andriy Redko --- .github/pull_request_template.md | 1 + CONTRIBUTING.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c47b9e0b69256..908a032bf833e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] All tests pass - [ ] New functionality has been documented. - [ ] New functionality has javadoc added +- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d379d78829318..4a1162cf2558b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,6 +8,7 @@ - [Developer Certificate of Origin](#developer-certificate-of-origin) - [Changelog](#changelog) - [Review Process](#review-process) + - [Troubleshooting Failing Builds](#troubleshooting-failing-builds) # Contributing to OpenSearch @@ -162,3 +163,14 @@ During the PR process, expect that there will be some back-and-forth. Please try If we accept the PR, a [maintainer](MAINTAINERS.md) will merge your change and usually take care of backporting it to appropriate branches ourselves. If we reject the PR, we will close the pull request with a comment explaining why. This decision isn't always final: if you feel we have misunderstood your intended change or otherwise think that we should reconsider then please continue the conversation with a comment on the PR and we'll do our best to address any further points you raise. + +## Troubleshooting Failing Builds + +The OpenSearch testing framework offers many capabilities but exhibits significant complexity (it does lot of randomization internally to cover as many edge cases and variations as possible). Unfortunately, this posses a challenge by making it harder to discover important issues/bugs in straightforward way and may lead to so called flaky tests - the tests which flip randomly from success to failure without any code changes. + +If your pull request reports a failing test(s) on one of the checks, please: + - look if there is an existing [issue](https://github.com/opensearch-project/OpenSearch/issues) reported for the test in question + - if not, please make sure this is not caused by your changes, run the failing test(s) locally for some time + - if you are sure the failure is not related, please open a new [bug](https://github.com/opensearch-project/OpenSearch/issues/new?assignees=&labels=bug%2C+untriaged&projects=&template=bug_template.md&title=%5BBUG%5D) with `flaky-test` label + - add a comment referencing the issue(s) or bug report(s) to your pull request explaining the failing build(s) + - as a bonus point, try to contribute by fixing the flaky test(s) From 1e28738b8c966011bf1ae1f00431f0377761cb0a Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Fri, 20 Oct 2023 12:17:51 -0500 Subject: [PATCH 16/26] Increase remote recovery thread pool size (#10750) The remote recovery thread pool does blocking I/O when downloading files, so the "half processor count max 10" was definitely too small. This can be shown by triggering recoveries on a node that is also doing segment replication, and the replication lag will increase due to contention on that thread pool. Some amount of contention is inevitable, but the change here to increase the download thread pool, and also limit the concurrent usage of that thread pool by any single recovery/replication to 25% of the threads does help. Long term, we can improve this even further by moving to fully async I/O to avoid blocking threads in the application on draining InputStreams. Signed-off-by: Andrew Ross --- .../org/opensearch/indices/recovery/RecoverySettings.java | 7 ++++--- .../main/java/org/opensearch/threadpool/ThreadPool.java | 7 ++++++- .../org/opensearch/threadpool/ScalingThreadPoolTests.java | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java index 44dfb2f4cb00a..0f3025369833d 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java @@ -41,6 +41,7 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; @@ -87,10 +88,10 @@ public class RecoverySettings { /** * Controls the maximum number of streams that can be started concurrently per recovery when downloading from the remote store. */ - public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = Setting.intSetting( + public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = new Setting<>( "indices.recovery.max_concurrent_remote_store_streams", - 10, - 1, + (s) -> Integer.toString(Math.max(1, OpenSearchExecutors.allocatedProcessors(s) / 2)), + (s) -> Setting.parseInt(s, 1, "indices.recovery.max_concurrent_remote_store_streams"), Property.Dynamic, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index fab7620292dd2..5f10986239300 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -273,7 +273,12 @@ public ThreadPool( ); builders.put( Names.REMOTE_RECOVERY, - new ScalingExecutorBuilder(Names.REMOTE_RECOVERY, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)) + new ScalingExecutorBuilder( + Names.REMOTE_RECOVERY, + 1, + twiceAllocatedProcessors(allocatedProcessors), + TimeValue.timeValueMinutes(5) + ) ); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { builders.put( diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index ba2d4b8c247bb..19271bbf30e80 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -154,7 +154,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.TRANSLOG_SYNC, n -> 4 * n); sizes.put(ThreadPool.Names.REMOTE_PURGE, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.REMOTE_REFRESH_RETRY, ThreadPool::halfAllocatedProcessorsMaxTen); - sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::halfAllocatedProcessorsMaxTen); + sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::twiceAllocatedProcessors); return sizes.get(threadPoolName).apply(numberOfProcessors); } From 4f8bcff554fe3b019d4149ddcdcc634120c67dd3 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Fri, 20 Oct 2023 15:57:17 -0700 Subject: [PATCH 17/26] Return unformatted Segment Replication metrics that take upload time into account for replication lag (#10723) * Return unformatted segrep metrics in stats Signed-off-by: Poojita Raj * Take upload time into account for replication time lag Signed-off-by: Poojita Raj * unformat segrep stats Signed-off-by: Poojita Raj * remove unused field names Signed-off-by: Poojita Raj --------- Signed-off-by: Poojita Raj --- .../main/java/org/opensearch/index/ReplicationStats.java | 8 +++----- .../main/java/org/opensearch/index/shard/IndexShard.java | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/ReplicationStats.java b/server/src/main/java/org/opensearch/index/ReplicationStats.java index 9cc6685c75f80..0ae4526365bf1 100644 --- a/server/src/main/java/org/opensearch/index/ReplicationStats.java +++ b/server/src/main/java/org/opensearch/index/ReplicationStats.java @@ -8,11 +8,9 @@ package org.opensearch.index; -import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; @@ -76,9 +74,9 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.SEGMENT_REPLICATION); - builder.field(Fields.MAX_BYTES_BEHIND, new ByteSizeValue(maxBytesBehind).toString()); - builder.field(Fields.TOTAL_BYTES_BEHIND, new ByteSizeValue(totalBytesBehind).toString()); - builder.field(Fields.MAX_REPLICATION_LAG, new TimeValue(maxReplicationLag)); + builder.field(Fields.MAX_BYTES_BEHIND, maxBytesBehind); + builder.field(Fields.TOTAL_BYTES_BEHIND, totalBytesBehind); + builder.field(Fields.MAX_REPLICATION_LAG, maxReplicationLag); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 1b7d1b2716979..f990a3b56e856 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3010,7 +3010,7 @@ public ReplicationStats getReplicationStats() { long maxBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).max().orElse(0L); long totalBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).sum(); long maxReplicationLag = stats.stream() - .mapToLong(SegmentReplicationShardStats::getCurrentReplicationTimeMillis) + .mapToLong(SegmentReplicationShardStats::getCurrentReplicationLagMillis) .max() .orElse(0L); return new ReplicationStats(maxBytesBehind, totalBytesBehind, maxReplicationLag); From 1e9ec52dd9d5e7d4a3ffb7d37c8b7fdf4069c26d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:10:31 -0700 Subject: [PATCH 18/26] [Segment Replication] Fix Flaky test SegmentReplicationRelocationIT.testPrimaryRelocation (#10701) * Add primary mode check before assserting on primary mode. Signed-off-by: Rishikesh1159 * remove unnecessary shardRouting check. Signed-off-by: Rishikesh1159 * Add test logging. Signed-off-by: Rishikesh1159 * Addressing comments on PR. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- .../replication/SegmentReplicationRelocationIT.java | 2 ++ .../replication/SegmentReplicationSourceHandler.java | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java index dd832a63d1e66..dbe0b43441f54 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java @@ -26,6 +26,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.TransportService; @@ -55,6 +56,7 @@ private void createIndex(int replicaCount) { * This test verifies happy path when primary shard is relocated newly added node (target) in the cluster. Before * relocation and after relocation documents are indexed and documents are verified */ + @TestLogging(reason = "Getting trace logs from replication,shard and allocation package", value = "org.opensearch.indices.replication:TRACE, org.opensearch.index.shard:TRACE, org.opensearch.cluster.routing.allocation:TRACE") public void testPrimaryRelocation() throws Exception { final String oldPrimary = internalCluster().startNode(); createIndex(1); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java index e2c47b0fb3159..674c09311c645 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java @@ -12,8 +12,6 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.ListenableFuture; @@ -22,7 +20,6 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.recovery.DelayRecoveryException; import org.opensearch.indices.recovery.FileChunkWriter; import org.opensearch.indices.recovery.MultiChunkTransfer; import org.opensearch.indices.replication.common.CopyState; @@ -146,12 +143,6 @@ public synchronized void sendFiles(GetSegmentFilesRequest request, ActionListene ); }; cancellableThreads.checkForCancel(); - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.getTargetAllocationId()); - if (targetShardRouting == null) { - logger.debug("delaying replication of {} as it is not listed as assigned to target node {}", shard.shardId(), targetNode); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } final StepListener sendFileStep = new StepListener<>(); Set storeFiles = new HashSet<>(Arrays.asList(shard.store().directory().listAll())); From 51626d03f857ca840280d1e57cfb1bdfbba75e2d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:13:43 -0700 Subject: [PATCH 19/26] [Segment Replication] Add Segment Replication backpressure rejection stats to _nodes/stats (#10656) * Initial WIP for adding segrep backpressure to node stats. Signed-off-by: Rishikesh1159 * Bind SegmentReplicarionStatsTracker in Node.java Signed-off-by: Rishikesh1159 * remove additional segrep backpressure info from node stats Signed-off-by: Rishikesh1159 * fix metric name in node stats Signed-off-by: Rishikesh1159 * Fix compile error. Signed-off-by: Rishikesh1159 * Fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update java docs. Signed-off-by: Rishikesh1159 * Address comments on PR and fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update unit test. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 Signed-off-by: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> --- .../admin/cluster/node/stats/NodeStats.java | 24 +++++++ .../cluster/node/stats/NodesStatsRequest.java | 1 + .../node/stats/TransportNodesStatsAction.java | 1 + .../stats/TransportClusterStatsAction.java | 1 + .../SegmentReplicationPressureService.java | 3 +- .../SegmentReplicationRejectionStats.java | 67 +++++++++++++++++++ .../index/SegmentReplicationStatsTracker.java | 8 +++ .../main/java/org/opensearch/node/Node.java | 4 ++ .../java/org/opensearch/node/NodeService.java | 7 ++ .../cluster/node/stats/NodeStatsTests.java | 18 +++++ .../opensearch/cluster/DiskUsageTests.java | 6 ++ ...egmentReplicationPressureServiceTests.java | 9 ++- .../SegmentReplicationStatsTrackerTests.java | 35 ++++++++++ .../snapshots/SnapshotResiliencyTests.java | 2 + .../MockInternalClusterInfoService.java | 1 + .../opensearch/test/InternalTestCluster.java | 1 + 16 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java create mode 100644 server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index e9bfa358103c8..3d37056956c69 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -129,6 +130,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchBackpressureStats searchBackpressureStats; + @Nullable + private SegmentReplicationRejectionStats segmentReplicationRejectionStats; + @Nullable private ClusterManagerThrottlingStats clusterManagerThrottlingStats; @@ -211,6 +215,12 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); + } else { + segmentReplicationRejectionStats = null; + } if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -244,6 +254,7 @@ public NodeStats( @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, + @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats ) { super(node); @@ -271,6 +282,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; } @@ -415,6 +427,10 @@ public SearchPipelineStats getSearchPipelineStats() { } @Nullable + public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { + return segmentReplicationRejectionStats; + } + public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } @@ -465,6 +481,10 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(segmentReplicationRejectionStats); + } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -561,6 +581,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getSegmentReplicationRejectionStats() != null) { + getSegmentReplicationRejectionStats().toXContent(builder, params); + } + if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 88dff20354aa2..fc72668d36413 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -215,6 +215,7 @@ public enum Metric { TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), + SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index aa02f8e580f4a..99cf42cfdc4d0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -126,6 +126,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index f51fabbfb2388..5efec8b876435 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -170,6 +170,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java index 4284daf9ffef4..d9d480e7b2b27 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java @@ -106,10 +106,11 @@ public SegmentReplicationPressureService( ClusterService clusterService, IndicesService indicesService, ShardStateAction shardStateAction, + SegmentReplicationStatsTracker tracker, ThreadPool threadPool ) { this.indicesService = indicesService; - this.tracker = new SegmentReplicationStatsTracker(this.indicesService); + this.tracker = tracker; this.shardStateAction = shardStateAction; this.threadPool = threadPool; diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java new file mode 100644 index 0000000000000..9f9f150ebe2d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Segment replication rejection stats. + * + * @opensearch.internal + */ +public class SegmentReplicationRejectionStats implements Writeable, ToXContentFragment { + + /** + * Total rejections due to segment replication backpressure + */ + private long totalRejectionCount; + + public SegmentReplicationRejectionStats(final long totalRejectionCount) { + this.totalRejectionCount = totalRejectionCount; + } + + public SegmentReplicationRejectionStats(StreamInput in) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalRejectionCount = in.readVLong(); + } + } + + public long getTotalRejectionCount() { + return totalRejectionCount; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("segment_replication_backpressure"); + builder.field("total_rejected_requests", totalRejectionCount); + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeVLong(totalRejectionCount); + } + } + + @Override + public String toString() { + return "SegmentReplicationRejectionStats{ totalRejectedRequestCount=" + totalRejectionCount + '}'; + } + +} diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java index 6d5c00c08caff..f5fc8aa1c1eea 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java @@ -33,6 +33,14 @@ public SegmentReplicationStatsTracker(IndicesService indicesService) { rejectionCount = ConcurrentCollections.newConcurrentMap(); } + public SegmentReplicationRejectionStats getTotalRejectionStats() { + return new SegmentReplicationRejectionStats(this.rejectionCount.values().stream().mapToInt(AtomicInteger::get).sum()); + } + + protected Map getRejectionCount() { + return rejectionCount; + } + public SegmentReplicationStats getStats() { Map stats = new HashMap<>(); for (IndexService indexService : indicesService) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c9148f382a028..711a90d424ac3 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; @@ -977,6 +978,7 @@ protected Node( transportService.getTaskManager() ); + final SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); RepositoriesModule repositoriesModule = new RepositoriesModule( this.environment, pluginsService.filterPlugins(RepositoryPlugin.class), @@ -1116,6 +1118,7 @@ protected Node( fileCache, taskCancellationMonitoringService, resourceUsageCollectorService, + segmentReplicationStatsTracker, repositoryService ); @@ -1246,6 +1249,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); + b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); }); injector = modules.createInjector(); diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index e2d7bc2c86ba3..49dde0b81cac7 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -48,6 +48,7 @@ import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -96,6 +97,8 @@ public class NodeService implements Closeable { private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + NodeService( Settings settings, ThreadPool threadPool, @@ -119,6 +122,7 @@ public class NodeService implements Closeable { FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, + SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService ) { this.settings = settings; @@ -146,6 +150,7 @@ public class NodeService implements Closeable { this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); + this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; } public NodeInfo info( @@ -226,6 +231,7 @@ public NodeStats stats( boolean taskCancellation, boolean searchPipelineStats, boolean resourceUsageStats, + boolean segmentReplicationTrackerStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -256,6 +262,7 @@ public NodeStats stats( fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, + segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 3491f18da9550..ebdd012006fb2 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -49,6 +49,7 @@ import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.RemoteTranslogStats; @@ -417,6 +418,17 @@ public void testSerialization() throws IOException { assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); }); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = nodeStats.getSegmentReplicationRejectionStats(); + SegmentReplicationRejectionStats deserializedSegmentReplicationRejectionStats = deserializedNodeStats + .getSegmentReplicationRejectionStats(); + if (segmentReplicationRejectionStats == null) { + assertNull(deserializedSegmentReplicationRejectionStats); + } else { + assertEquals( + segmentReplicationRejectionStats.getTotalRejectionCount(), + deserializedSegmentReplicationRejectionStats.getTotalRejectionCount() + ); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -812,6 +824,11 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = null; + if (frequently()) { + segmentReplicationRejectionStats = new SegmentReplicationRejectionStats(randomNonNegativeLong()); + } + ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -853,6 +870,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { null, null, null, + segmentReplicationRejectionStats, null ); } diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 6f03e87bf5824..f037b75dc16a3 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -192,6 +192,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -220,6 +221,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -248,6 +250,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -307,6 +310,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -335,6 +339,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -363,6 +368,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java index 34fa13f0ba62c..478fdcb24f76a 100644 --- a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java @@ -278,6 +278,13 @@ private SegmentReplicationPressureService buildPressureService(Settings settings ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); + return new SegmentReplicationPressureService( + settings, + clusterService, + indicesService, + shardStateAction, + new SegmentReplicationStatsTracker(indicesService), + mock(ThreadPool.class) + ); } } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java new file mode 100644 index 0000000000000..04423d583e8f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; + +public class SegmentReplicationStatsTrackerTests extends OpenSearchTestCase { + + private IndicesService indicesService = mock(IndicesService.class); + + public void testRejectedCount() { + SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); + + // Verify that total rejection count is 0 on an empty rejectionCount map in statsTracker. + assertTrue(segmentReplicationStatsTracker.getRejectionCount().isEmpty()); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 0L); + + // Verify that total rejection count is 1 after incrementing rejectionCount. + segmentReplicationStatsTracker.incrementRejectionCount(Mockito.mock(ShardId.class)); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 1L); + } + +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 710717532ceb4..b7a2baacba611 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -178,6 +178,7 @@ import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2188,6 +2189,7 @@ public void onFailure(final Exception e) { clusterService, mock(IndicesService.class), mock(ShardStateAction.class), + mock(SegmentReplicationStatsTracker.class), mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 60a54110fd0b4..2ba4de5e54a67 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -122,6 +122,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), + nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 898e125b94954..63d8f069bebea 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2722,6 +2722,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( From 7c5a806d5bbee77c0c4a184a500bf5522a8d8cd7 Mon Sep 17 00:00:00 2001 From: Movva Ajaykumar Date: Sat, 21 Oct 2023 13:31:09 +0530 Subject: [PATCH 20/26] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) * Changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) Signed-off-by: Ajay Kumar Movva --- CHANGELOG.md | 1 + .../TransportReplicationAction.java | 10 +- .../common/network/NetworkModule.java | 16 +- .../common/settings/ClusterSettings.java | 8 +- .../main/java/org/opensearch/node/Node.java | 19 +- .../AdmissionControlService.java | 104 +++++++++ .../AdmissionControlSettings.java | 83 ++++++++ .../controllers/AdmissionController.java | 70 ++++++ .../CPUBasedAdmissionController.java | 55 +++++ .../controllers/package-info.java | 12 ++ .../enums/AdmissionControlMode.java | 66 ++++++ .../enums/TransportActionType.java | 45 ++++ .../admissioncontrol/enums/package-info.java | 12 ++ .../admissioncontrol/package-info.java | 12 ++ .../CPUBasedAdmissionControllerSettings.java | 110 ++++++++++ .../settings/package-info.java | 11 + .../AdmissionControlTransportHandler.java | 65 ++++++ .../AdmissionControlTransportInterceptor.java | 40 ++++ .../transport/package-info.java | 11 + .../ratelimitting/package-info.java | 12 ++ .../common/network/NetworkModuleTests.java | 199 +++++++++++++++++- .../AdmissionControlServiceTests.java | 140 ++++++++++++ .../AdmissionControlSettingsTests.java | 103 +++++++++ .../CPUBasedAdmissionControllerTests.java | 109 ++++++++++ .../enums/AdmissionControlModeTests.java | 29 +++ .../enums/TransportActionTypeTests.java | 27 +++ ...CPUBasedAdmissionControlSettingsTests.java | 153 ++++++++++++++ ...AdmissionControlTransportHandlerTests.java | 92 ++++++++ 28 files changed, 1595 insertions(+), 19 deletions(-) create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/package-info.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d7d492679c79d..374dd4ab57ee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java index b68bd13cfed80..ddebdc5530e70 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java @@ -134,6 +134,12 @@ public abstract class TransportReplicationAction< Setting.Property.NodeScope ); + /** + * Making primary and replica actions suffixes as constant + */ + public static final String PRIMARY_ACTION_SUFFIX = "[p]"; + public static final String REPLICA_ACTION_SUFFIX = "[r]"; + protected final ThreadPool threadPool; protected final TransportService transportService; protected final ClusterService clusterService; @@ -204,8 +210,8 @@ protected TransportReplicationAction( this.shardStateAction = shardStateAction; this.executor = executor; - this.transportPrimaryAction = actionName + "[p]"; - this.transportReplicaAction = actionName + "[r]"; + this.transportPrimaryAction = actionName + PRIMARY_ACTION_SUFFIX; + this.transportReplicaAction = actionName + REPLICA_ACTION_SUFFIX; this.initialRetryBackoffBound = REPLICATION_INITIAL_RETRY_BACKOFF_BOUND.get(settings); this.retryTimeout = REPLICATION_RETRY_TIMEOUT.get(settings); diff --git a/server/src/main/java/org/opensearch/common/network/NetworkModule.java b/server/src/main/java/org/opensearch/common/network/NetworkModule.java index 0734659d8ee72..821d48fccf48c 100644 --- a/server/src/main/java/org/opensearch/common/network/NetworkModule.java +++ b/server/src/main/java/org/opensearch/common/network/NetworkModule.java @@ -131,7 +131,7 @@ public final class NetworkModule { private final Map> transportFactories = new HashMap<>(); private final Map> transportHttpFactories = new HashMap<>(); - private final List transportIntercetors = new ArrayList<>(); + private final List transportInterceptors = new ArrayList<>(); /** * Creates a network module that custom networking classes can be plugged into. @@ -149,9 +149,13 @@ public NetworkModule( NetworkService networkService, HttpServerTransport.Dispatcher dispatcher, ClusterSettings clusterSettings, - Tracer tracer + Tracer tracer, + List transportInterceptors ) { this.settings = settings; + if (transportInterceptors != null) { + transportInterceptors.forEach(this::registerTransportInterceptor); + } for (NetworkPlugin plugin : plugins) { Map> httpTransportFactory = plugin.getHttpTransports( settings, @@ -180,11 +184,11 @@ public NetworkModule( for (Map.Entry> entry : transportFactory.entrySet()) { registerTransport(entry.getKey(), entry.getValue()); } - List transportInterceptors = plugin.getTransportInterceptors( + List pluginTransportInterceptors = plugin.getTransportInterceptors( namedWriteableRegistry, threadPool.getThreadContext() ); - for (TransportInterceptor interceptor : transportInterceptors) { + for (TransportInterceptor interceptor : pluginTransportInterceptors) { registerTransportInterceptor(interceptor); } } @@ -264,7 +268,7 @@ public Supplier getTransportSupplier() { * Registers a new {@link TransportInterceptor} */ private void registerTransportInterceptor(TransportInterceptor interceptor) { - this.transportIntercetors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); + this.transportInterceptors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); } /** @@ -272,7 +276,7 @@ private void registerTransportInterceptor(TransportInterceptor interceptor) { * @see #registerTransportInterceptor(TransportInterceptor) */ public TransportInterceptor getTransportInterceptor() { - return new CompositeTransportInterceptor(this.transportIntercetors); + return new CompositeTransportInterceptor(this.transportInterceptors); } static final class CompositeTransportInterceptor implements TransportInterceptor { diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 76883c200542e..7ac7da819b215 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -134,6 +134,8 @@ import org.opensearch.persistent.PersistentTasksClusterService; import org.opensearch.persistent.decider.EnableAssignmentDecider; import org.opensearch.plugins.PluginsService; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.rest.BaseRestHandler; import org.opensearch.script.ScriptService; @@ -682,7 +684,11 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, - IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING + IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT ) ) ); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 711a90d424ac3..e80b768074fc7 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -197,6 +197,8 @@ import org.opensearch.plugins.SearchPlugin; import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.plugins.TelemetryPlugin; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.transport.AdmissionControlTransportInterceptor; import org.opensearch.repositories.RepositoriesModule; import org.opensearch.repositories.RepositoriesService; import org.opensearch.rest.RestController; @@ -820,6 +822,7 @@ protected Node( remoteStoreStatsTrackerFactory, recoverySettings ); + final AliasValidator aliasValidator = new AliasValidator(); final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); @@ -891,6 +894,17 @@ protected Node( final RestController restController = actionModule.getRestController(); + final AdmissionControlService admissionControlService = new AdmissionControlService( + settings, + clusterService.getClusterSettings(), + threadPool + ); + + AdmissionControlTransportInterceptor admissionControlTransportInterceptor = new AdmissionControlTransportInterceptor( + admissionControlService + ); + + List transportInterceptors = List.of(admissionControlTransportInterceptor); final NetworkModule networkModule = new NetworkModule( settings, pluginsService.filterPlugins(NetworkPlugin.class), @@ -903,8 +917,10 @@ protected Node( networkService, restController, clusterService.getClusterSettings(), - tracer + tracer, + transportInterceptors ); + Collection>> indexTemplateMetadataUpgraders = pluginsService.filterPlugins( Plugin.class ).stream().map(Plugin::getIndexTemplateMetadataUpgrader).collect(Collectors.toList()); @@ -1181,6 +1197,7 @@ protected Node( b.bind(IndexingPressureService.class).toInstance(indexingPressureService); b.bind(TaskResourceTrackingService.class).toInstance(taskResourceTrackingService); b.bind(SearchBackpressureService.class).toInstance(searchBackpressureService); + b.bind(AdmissionControlService.class).toInstance(admissionControlService); b.bind(UsageService.class).toInstance(usageService); b.bind(AggregationUsageService.class).toInstance(searchModule.getValuesSourceRegistry().getUsageService()); b.bind(NamedWriteableRegistry.class).toInstance(namedWriteableRegistry); diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java new file mode 100644 index 0000000000000..2cc409b0e4465 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.threadpool.ThreadPool; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER; + +/** + * Admission control Service that bootstraps and manages all the Admission Controllers in OpenSearch. + */ +public class AdmissionControlService { + private final ThreadPool threadPool; + public final AdmissionControlSettings admissionControlSettings; + private final ConcurrentMap ADMISSION_CONTROLLERS; + private static final Logger logger = LogManager.getLogger(AdmissionControlService.class); + private final ClusterSettings clusterSettings; + private final Settings settings; + + /** + * + * @param settings Immutable settings instance + * @param clusterSettings ClusterSettings Instance + * @param threadPool ThreadPool Instance + */ + public AdmissionControlService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this.threadPool = threadPool; + this.admissionControlSettings = new AdmissionControlSettings(clusterSettings, settings); + this.ADMISSION_CONTROLLERS = new ConcurrentHashMap<>(); + this.clusterSettings = clusterSettings; + this.settings = settings; + this.initialise(); + } + + /** + * Initialise and Register all the admissionControllers + */ + private void initialise() { + // Initialise different type of admission controllers + registerAdmissionController(CPU_BASED_ADMISSION_CONTROLLER); + } + + /** + * Handler to trigger registered admissionController + */ + public void applyTransportAdmissionControl(String action) { + this.ADMISSION_CONTROLLERS.forEach((name, admissionController) -> { admissionController.apply(action); }); + } + + /** + * + * @param admissionControllerName admissionControllerName to register into the service. + */ + public void registerAdmissionController(String admissionControllerName) { + AdmissionController admissionController = this.controllerFactory(admissionControllerName); + this.ADMISSION_CONTROLLERS.put(admissionControllerName, admissionController); + } + + /** + * @return AdmissionController Instance + */ + private AdmissionController controllerFactory(String admissionControllerName) { + switch (admissionControllerName) { + case CPU_BASED_ADMISSION_CONTROLLER: + return new CPUBasedAdmissionController(admissionControllerName, this.settings, this.clusterSettings); + default: + throw new IllegalArgumentException("Not Supported AdmissionController : " + admissionControllerName); + } + } + + /** + * + * @return list of the registered admissionControllers + */ + public List getAdmissionControllers() { + return new ArrayList<>(this.ADMISSION_CONTROLLERS.values()); + } + + /** + * + * @param controllerName name of the admissionController + * @return instance of the AdmissionController Instance + */ + public AdmissionController getAdmissionController(String controllerName) { + return this.ADMISSION_CONTROLLERS.getOrDefault(controllerName, null); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java new file mode 100644 index 0000000000000..b557190ab54ac --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +/** + * Settings related to admission control. + * @opensearch.internal + */ +public final class AdmissionControlSettings { + + /** + * Default parameters for the AdmissionControlSettings + */ + public static class Defaults { + public static final String MODE = "disabled"; + } + + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting ADMISSION_CONTROL_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.mode", + Defaults.MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + private volatile AdmissionControlMode transportLayeradmissionControlMode; + + /** + * @param clusterSettings clusterSettings Instance + * @param settings settings instance + */ + public AdmissionControlSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayeradmissionControlMode = ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, this::setAdmissionControlTransportLayerMode); + } + + /** + * + * @param admissionControlMode update the mode of admission control feature + */ + private void setAdmissionControlTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayeradmissionControlMode = admissionControlMode; + } + + /** + * + * @return return the default mode of the admissionControl + */ + public AdmissionControlMode getAdmissionControlTransportLayerMode() { + return this.transportLayeradmissionControlMode; + } + + /** + * + * @return true based on the admission control feature is enforced else false + */ + public Boolean isTransportLayerAdmissionControlEnforced() { + return this.transportLayeradmissionControlMode == AdmissionControlMode.ENFORCED; + } + + /** + * + * @return true based on the admission control feature is enabled else false + */ + public Boolean isTransportLayerAdmissionControlEnabled() { + return this.transportLayeradmissionControlMode != AdmissionControlMode.DISABLED; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java new file mode 100644 index 0000000000000..00564a9967f31 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Abstract class for Admission Controller in OpenSearch, which aims to provide resource based request admission control. + * It provides methods for any tracking-object that can be incremented (such as memory size), + * and admission control can be applied if configured limit has been reached + */ +public abstract class AdmissionController { + + private final AtomicLong rejectionCount; + private final String admissionControllerName; + + /** + * + * @param rejectionCount initialised rejectionCount value for AdmissionController + * @param admissionControllerName name of the admissionController + */ + public AdmissionController(AtomicLong rejectionCount, String admissionControllerName) { + this.rejectionCount = rejectionCount; + this.admissionControllerName = admissionControllerName; + } + + /** + * Return the current state of the admission controller + * @return true if admissionController is enabled for the transport layer else false + */ + public boolean isEnabledForTransportLayer(AdmissionControlMode admissionControlMode) { + return admissionControlMode != AdmissionControlMode.DISABLED; + } + + /** + * Increment the tracking-objects and apply the admission control if threshold is breached. + * Mostly applicable while applying admission controller + */ + public abstract void apply(String action); + + /** + * @return name of the admission-controller + */ + public String getName() { + return this.admissionControllerName; + } + + /** + * Adds the rejection count for the controller. Primarily used when copying controller states. + * @param count To add the value of the tracking resource object as the provided count + */ + public void addRejectionCount(long count) { + this.rejectionCount.addAndGet(count); + } + + /** + * @return current value of the rejection count metric tracked by the admission-controller. + */ + public long getRejectionCount() { + return this.rejectionCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java new file mode 100644 index 0000000000000..3a8956b2cce87 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Class for CPU Based Admission Controller in OpenSearch, which aims to provide CPU utilisation admission control. + * It provides methods to apply admission control if configured limit has been reached + */ +public class CPUBasedAdmissionController extends AdmissionController { + private static final Logger LOGGER = LogManager.getLogger(CPUBasedAdmissionController.class); + public CPUBasedAdmissionControllerSettings settings; + + /** + * + * @param admissionControllerName State of the admission controller + */ + public CPUBasedAdmissionController(String admissionControllerName, Settings settings, ClusterSettings clusterSettings) { + super(new AtomicLong(0), admissionControllerName); + this.settings = new CPUBasedAdmissionControllerSettings(clusterSettings, settings); + } + + /** + * This function will take of applying admission controller based on CPU usage + * @param action is the transport action + */ + @Override + public void apply(String action) { + // TODO Will extend this logic further currently just incrementing rejectionCount + if (this.isEnabledForTransportLayer(this.settings.getTransportLayerAdmissionControllerMode())) { + this.applyForTransportLayer(action); + } + } + + private void applyForTransportLayer(String actionName) { + // currently incrementing counts to evaluate the controller triggering as expected and using in testing so limiting to 10 + // TODO will update rejection logic further in next PR's + if (this.getRejectionCount() < 10) { + this.addRejectionCount(1); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java new file mode 100644 index 0000000000000..23746cc61a203 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains classes related to the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.controllers; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java new file mode 100644 index 0000000000000..2ae2436ba84e7 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Defines the AdmissionControlMode + */ +public enum AdmissionControlMode { + /** + * AdmissionController is completely disabled. + */ + DISABLED("disabled"), + + /** + * AdmissionController only monitors the rejection criteria for the requests. + */ + MONITOR("monitor_only"), + + /** + * AdmissionController monitors and rejects tasks that exceed resource usage thresholds. + */ + ENFORCED("enforced"); + + private final String mode; + + /** + * @param mode update mode of the admission controller + */ + AdmissionControlMode(String mode) { + this.mode = mode; + } + + /** + * + * @return mode of the admission controller + */ + public String getMode() { + return this.mode; + } + + /** + * + * @param name is the mode of the current + * @return Enum of AdmissionControlMode based on the mode + */ + public static AdmissionControlMode fromName(String name) { + switch (name.toLowerCase(Locale.ROOT)) { + case "disabled": + return DISABLED; + case "monitor_only": + return MONITOR; + case "enforced": + return ENFORCED; + default: + throw new IllegalArgumentException("Invalid AdmissionControlMode: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java new file mode 100644 index 0000000000000..f2fdca0cfe49b --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Enums that defines the type of the transport requests + */ +public enum TransportActionType { + INDEXING("indexing"), + SEARCH("search"); + + private final String type; + + TransportActionType(String uriType) { + this.type = uriType; + } + + /** + * + * @return type of the request + */ + public String getType() { + return type; + } + + public static TransportActionType fromName(String name) { + name = name.toLowerCase(Locale.ROOT); + switch (name) { + case "indexing": + return INDEXING; + case "search": + return SEARCH; + default: + throw new IllegalArgumentException("Not Supported TransportAction Type: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java new file mode 100644 index 0000000000000..98b08ebd0a7bf --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains enums related to the different admission controller feature + */ +package org.opensearch.ratelimitting.admissioncontrol.enums; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java new file mode 100644 index 0000000000000..b3dc229f86fb6 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains base classes needed for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java new file mode 100644 index 0000000000000..141e9b68db145 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.Arrays; +import java.util.List; + +/** + * Settings related to cpu based admission controller. + * @opensearch.internal + */ +public class CPUBasedAdmissionControllerSettings { + public static final String CPU_BASED_ADMISSION_CONTROLLER = "global_cpu_usage"; + + /** + * Default parameters for the CPUBasedAdmissionControllerSettings + */ + public static class Defaults { + public static final long CPU_USAGE = 95; + public static List TRANSPORT_LAYER_DEFAULT_URI_TYPE = Arrays.asList("indexing", "search"); + } + + private AdmissionControlMode transportLayerMode; + private Long searchCPULimit; + private Long indexingCPULimit; + + private final List transportActionsList; + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.cpu_usage.mode_override", + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU Limits for the search requests by default it will use default IO usage limit + */ + public static final Setting SEARCH_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.search.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU limits for the indexing requests by default it will use default IO usage limit + */ + public static final Setting INDEXING_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.indexing.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + // currently limited to one setting will add further more settings in follow-up PR's + public CPUBasedAdmissionControllerSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayerMode = CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, this::setTransportLayerMode); + this.searchCPULimit = SEARCH_CPU_USAGE_LIMIT.get(settings); + this.indexingCPULimit = INDEXING_CPU_USAGE_LIMIT.get(settings); + this.transportActionsList = Defaults.TRANSPORT_LAYER_DEFAULT_URI_TYPE; + clusterSettings.addSettingsUpdateConsumer(INDEXING_CPU_USAGE_LIMIT, this::setIndexingCPULimit); + clusterSettings.addSettingsUpdateConsumer(SEARCH_CPU_USAGE_LIMIT, this::setSearchCPULimit); + } + + private void setTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayerMode = admissionControlMode; + } + + public AdmissionControlMode getTransportLayerAdmissionControllerMode() { + return transportLayerMode; + } + + public Long getSearchCPULimit() { + return searchCPULimit; + } + + public Long getIndexingCPULimit() { + return indexingCPULimit; + } + + public void setIndexingCPULimit(Long indexingCPULimit) { + this.indexingCPULimit = indexingCPULimit; + } + + public void setSearchCPULimit(Long searchCPULimit) { + this.searchCPULimit = searchCPULimit; + } + + public List getTransportActionsList() { + return transportActionsList; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java new file mode 100644 index 0000000000000..a024ccc756745 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains settings related classes for the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.settings; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java new file mode 100644 index 0000000000000..7d0f5fbc17a51 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * AdmissionControl Handler to intercept Transport Requests. + * @param Transport Request + */ +public class AdmissionControlTransportHandler implements TransportRequestHandler { + + private final String action; + private final TransportRequestHandler actualHandler; + protected final Logger log = LogManager.getLogger(this.getClass()); + AdmissionControlService admissionControlService; + boolean forceExecution; + + public AdmissionControlTransportHandler( + String action, + TransportRequestHandler actualHandler, + AdmissionControlService admissionControlService, + boolean forceExecution + ) { + super(); + this.action = action; + this.actualHandler = actualHandler; + this.admissionControlService = admissionControlService; + this.forceExecution = forceExecution; + } + + /** + * @param request Transport Request that landed on the node + * @param channel Transport channel allows to send a response to a request + * @param task Current task that is executing + * @throws Exception when admission control rejected the requests + */ + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + // intercept all the transport requests here and apply admission control + try { + // TODO Need to evaluate if we need to apply admission control or not if force Execution is true will update in next PR. + this.admissionControlService.applyTransportAdmissionControl(this.action); + } catch (final OpenSearchRejectedExecutionException openSearchRejectedExecutionException) { + log.warn(openSearchRejectedExecutionException.getMessage()); + channel.sendResponse(openSearchRejectedExecutionException); + } catch (final Exception e) { + throw e; + } + actualHandler.messageReceived(request, channel, task); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java new file mode 100644 index 0000000000000..01cfcbd780006 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.transport.TransportInterceptor; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * This class allows throttling to intercept requests on both the sender and the receiver side. + */ +public class AdmissionControlTransportInterceptor implements TransportInterceptor { + + AdmissionControlService admissionControlService; + + public AdmissionControlTransportInterceptor(AdmissionControlService admissionControlService) { + this.admissionControlService = admissionControlService; + } + + /** + * + * @return admissionController handler to intercept transport requests + */ + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + return new AdmissionControlTransportHandler<>(action, actualHandler, this.admissionControlService, forceExecution); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java new file mode 100644 index 0000000000000..f97f31bc7b1db --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains transport related classes for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol.transport; diff --git a/server/src/main/java/org/opensearch/ratelimitting/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/package-info.java new file mode 100644 index 0000000000000..c04358e14284f --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Base OpenSearch Throttling package + */ +package org.opensearch.ratelimitting; diff --git a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java index 0ca118fe422a5..ab51cafb039c2 100644 --- a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java +++ b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java @@ -57,6 +57,7 @@ import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -124,7 +125,7 @@ public Map> getTransports( return Collections.singletonMap("custom", custom); } }; - NetworkModule module = newNetworkModule(settings, plugin); + NetworkModule module = newNetworkModule(settings, null, plugin); assertSame(custom, module.getTransportSupplier()); } @@ -135,7 +136,7 @@ public void testRegisterHttpTransport() { .build(); Supplier custom = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getHttpTransports( Settings settings, @@ -155,7 +156,7 @@ public Map> getHttpTransports( assertSame(custom, module.getHttpServerTransportSupplier()); settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); - NetworkModule newModule = newNetworkModule(settings); + NetworkModule newModule = newNetworkModule(settings, null); expectThrows(IllegalStateException.class, () -> newModule.getHttpServerTransportSupplier()); } @@ -169,7 +170,7 @@ public void testOverrideDefault() { Supplier customTransport = () -> null; // content doesn't matter we check reference equality Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -214,7 +215,7 @@ public void testDefaultKeys() { Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; Supplier customTransport = () -> null; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -273,7 +274,7 @@ public TransportRequestHandler interceptHandler( return actualHandler; } }; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -295,7 +296,7 @@ public List getTransportInterceptors( assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); NullPointerException nullPointerException = expectThrows(NullPointerException.class, () -> { - newNetworkModule(settings, new NetworkPlugin() { + newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -309,7 +310,186 @@ public List getTransportInterceptors( assertEquals("interceptor must not be null", nullPointerException.getMessage()); } - private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugins) { + public void testRegisterCoreInterceptor() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(0, called.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertTrue(transportInterceptor instanceof NetworkModule.CompositeTransportInterceptor); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 1); + assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); + } + + public void testInterceptorOrder() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + assertEquals(1, called1.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertEquals(2, called1.get()); + } + + public void testInterceptorOrderException() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + throw new RuntimeException("Handler Invoke Failed"); + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + try { + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(1, called1.get()); + } + try { + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(2, called1.get()); + } + } + + private NetworkModule newNetworkModule( + Settings settings, + List coreTransportInterceptors, + NetworkPlugin... plugins + ) { return new NetworkModule( settings, Arrays.asList(plugins), @@ -322,7 +502,8 @@ private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugi null, new NullDispatcher(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - NoopTracer.INSTANCE + NoopTracer.INSTANCE, + coreTransportInterceptors ); } } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java new file mode 100644 index 0000000000000..bac4eaf3fd677 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; + +public class AdmissionControlServiceTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + private AdmissionControlService admissionControlService; + private String action = ""; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + action = "indexing"; + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testWhenAdmissionControllerRegistered() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + } + + public void testRegisterInvalidAdmissionController() { + String test = "TEST"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> admissionControlService.registerAdmissionController(test) + ); + assertEquals(ex.getMessage(), "Not Supported AdmissionController : " + test); + } + + public void testAdmissionControllerSettings() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + AdmissionControlSettings admissionControlSettings = admissionControlService.admissionControlSettings; + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + CPUBasedAdmissionController cpuBasedAdmissionController = (CPUBasedAdmissionController) admissionControlService + .getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.DISABLED.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + + Settings newSettings = Settings.builder() + .put(settings) + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(newSettings); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue( + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + } + + public void testApplyAdmissionControllerDisabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + admissionControllerList.forEach(admissionController -> { assertEquals(admissionController.getRejectionCount(), 0); }); + } + + public void testApplyAdmissionControllerEnabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 0 + ); + + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 1 + ); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..c11ee1cc608f6 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; +import java.util.Set; + +public class AdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the admission controller settings should be supported built in settings", + settings.containsAll(List.of(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE)) + ); + } + + public void testDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + assertEquals(admissionControlSettings.getAdmissionControlTransportLayerMode().getMode(), AdmissionControlSettings.Defaults.MODE); + } + + public void testGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + Settings newSettings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + clusterService.getClusterSettings().applySettings(newSettings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java new file mode 100644 index 0000000000000..af6ec0749e709 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +public class CPUBasedAdmissionControllerTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + CPUBasedAdmissionController admissionController = null; + + String action = "TEST_ACTION"; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testCheckDefaultParameters() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertFalse( + admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode()) + ); + } + + public void testCheckUpdateSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + } + + public void testApplyControllerWithDefaultSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 0); + } + + public void testApplyControllerWhenSettingsEnabled() { + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + settings, + clusterService.getClusterSettings() + ); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + assertEquals(admissionController.getRejectionCount(), 0); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 1); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java new file mode 100644 index 0000000000000..98c0f3c7cf24c --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class AdmissionControlModeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(AdmissionControlMode.DISABLED.getMode(), "disabled"); + assertEquals(AdmissionControlMode.ENFORCED.getMode(), "enforced"); + assertEquals(AdmissionControlMode.MONITOR.getMode(), "monitor_only"); + assertEquals(AdmissionControlMode.fromName("disabled"), AdmissionControlMode.DISABLED); + assertEquals(AdmissionControlMode.fromName("enforced"), AdmissionControlMode.ENFORCED); + assertEquals(AdmissionControlMode.fromName("monitor_only"), AdmissionControlMode.MONITOR); + } + + public void testInValidActionType() { + String name = "TEST"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AdmissionControlMode.fromName(name)); + assertEquals(ex.getMessage(), "Invalid AdmissionControlMode: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java new file mode 100644 index 0000000000000..02f582c26f54e --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class TransportActionTypeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(TransportActionType.SEARCH.getType(), "search"); + assertEquals(TransportActionType.INDEXING.getType(), "indexing"); + assertEquals(TransportActionType.fromName("search"), TransportActionType.SEARCH); + assertEquals(TransportActionType.fromName("indexing"), TransportActionType.INDEXING); + } + + public void testInValidActionType() { + String name = "test"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> TransportActionType.fromName(name)); + assertEquals(ex.getMessage(), "Not Supported TransportAction Type: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..43103926a69a2 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java @@ -0,0 +1,153 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.Arrays; +import java.util.Set; + +public class CPUBasedAdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the cpu based admission controller settings should be supported built in settings", + settings.containsAll( + Arrays.asList( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT + ) + ) + ); + } + + public void testDefaultSettings() { + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + long percent = 95; + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportActionsList(), Arrays.asList("indexing", "search")); + } + + public void testGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } + + public void testUpdateAfterGetDefaultSettings() { + long percent = 95; + long searchPercent = 80; + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + } + + public void testUpdateAfterGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + long searchPercent = 80; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + + Settings updatedSettings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.MONITOR); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + + searchPercent = 70; + + updatedSettings = Settings.builder() + .put(updatedSettings) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java new file mode 100644 index 0000000000000..03d4819a94045 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class AdmissionControlTransportHandlerTests extends OpenSearchTestCase { + AdmissionControlTransportHandler admissionControlTransportHandler; + + public void testHandlerInvoked() throws Exception { + String action = "TEST"; + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + mock(AdmissionControlService.class), + false + ); + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRejectedException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new OpenSearchRejectedExecutionException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (OpenSearchRejectedExecutionException exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRandomException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new NullPointerException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (Exception exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + private class InterceptingRequestHandler implements TransportRequestHandler { + private final String action; + public int count; + + public InterceptingRequestHandler(String action) { + this.action = action; + this.count = 0; + } + + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + this.count = this.count + 1; + } + } +} From 14d4a6389bd5a7612c14e2e6fcb5a39822af9ee0 Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 21 Oct 2023 18:42:52 +0530 Subject: [PATCH 21/26] [Remote Store] Use time elapsed since last successful local refresh for refresh lag (#10803) * [Remote Store] Use time elapsed since last successful local refresh for time lag Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- ...emoteStoreBackpressureAndResiliencyIT.java | 6 ++- .../remote/RemoteSegmentTransferTracker.java | 53 ++++++++++++------- .../remote/RemoteStorePressureService.java | 1 - .../RemoteSegmentTransferTrackerTests.java | 39 +++++++++----- .../RemoteStorePressureServiceTests.java | 32 ++++++++--- 5 files changed, 86 insertions(+), 45 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 98586b60dcc69..f19c9db7874db 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -56,7 +56,7 @@ public void testWritesRejectedDueToBytesLagBreach() throws Exception { public void testWritesRejectedDueToTimeLagBreach() throws Exception { // Initially indexing happens with doc size of 1KB, then all remote store interactions start failing. Now, the // indexing happens with doc size of 1 byte leading to time lag limit getting exceeded and leading to rejections. - validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 15, "time_lag"); + validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 3, "time_lag"); } private void validateBackpressure( @@ -133,11 +133,13 @@ private RemoteSegmentTransferTracker.Stats stats() { return matches.get(0).getSegmentStats(); } - private void indexDocAndRefresh(BytesReference source, int iterations) { + private void indexDocAndRefresh(BytesReference source, int iterations) throws InterruptedException { for (int i = 0; i < iterations; i++) { client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); refresh(INDEX_NAME); } + Thread.sleep(250); + client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); } /** diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 2a703f17aa953..fb65d9ef83be2 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -66,6 +67,12 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long remoteRefreshTimeMs; + /** + * This is the time of first local refresh after the last successful remote refresh. When the remote store is in + * sync with local refresh, this will be reset to -1. + */ + private volatile long remoteRefreshStartTimeMs = -1; + /** * The refresh time(clock) of most recent remote refresh. */ @@ -76,11 +83,6 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long refreshSeqNoLag; - /** - * Keeps the time (ms) lag computed so that we do not compute it for every request. - */ - private volatile long timeMsLag; - /** * Keeps track of the total bytes of segment files which were uploaded to remote store during last successful remote refresh */ @@ -132,14 +134,19 @@ public RemoteSegmentTransferTracker( logger = Loggers.getLogger(getClass(), shardId); // Both the local refresh time and remote refresh time are set with current time to give consistent view of time lag when it arises. long currentClockTimeMs = System.currentTimeMillis(); - long currentTimeMs = System.nanoTime() / 1_000_000L; + long currentTimeMs = currentTimeMsUsingSystemNanos(); localRefreshTimeMs = currentTimeMs; remoteRefreshTimeMs = currentTimeMs; + remoteRefreshStartTimeMs = currentTimeMs; localRefreshClockTimeMs = currentClockTimeMs; remoteRefreshClockTimeMs = currentClockTimeMs; this.directoryFileTransferTracker = directoryFileTransferTracker; } + public static long currentTimeMsUsingSystemNanos() { + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime()); + } + @Override public void incrementTotalUploadsFailed() { super.incrementTotalUploadsFailed(); @@ -180,19 +187,22 @@ public long getLocalRefreshClockTimeMs() { */ public void updateLocalRefreshTimeAndSeqNo() { updateLocalRefreshClockTimeMs(System.currentTimeMillis()); - updateLocalRefreshTimeMs(System.nanoTime() / 1_000_000L); + updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); updateLocalRefreshSeqNo(getLocalRefreshSeqNo() + 1); } // Visible for testing - void updateLocalRefreshTimeMs(long localRefreshTimeMs) { + synchronized void updateLocalRefreshTimeMs(long localRefreshTimeMs) { assert localRefreshTimeMs >= this.localRefreshTimeMs : "newLocalRefreshTimeMs=" + localRefreshTimeMs + " < " + "currentLocalRefreshTimeMs=" + this.localRefreshTimeMs; + boolean isRemoteInSyncBeforeLocalRefresh = this.localRefreshTimeMs == this.remoteRefreshTimeMs; this.localRefreshTimeMs = localRefreshTimeMs; - computeTimeMsLag(); + if (isRemoteInSyncBeforeLocalRefresh) { + this.remoteRefreshStartTimeMs = localRefreshTimeMs; + } } private void updateLocalRefreshClockTimeMs(long localRefreshClockTimeMs) { @@ -221,14 +231,18 @@ long getRemoteRefreshClockTimeMs() { return remoteRefreshClockTimeMs; } - public void updateRemoteRefreshTimeMs(long remoteRefreshTimeMs) { - assert remoteRefreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" - + remoteRefreshTimeMs + public synchronized void updateRemoteRefreshTimeMs(long refreshTimeMs) { + assert refreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" + + refreshTimeMs + " < " + "currentRemoteRefreshTimeMs=" + this.remoteRefreshTimeMs; - this.remoteRefreshTimeMs = remoteRefreshTimeMs; - computeTimeMsLag(); + this.remoteRefreshTimeMs = refreshTimeMs; + // When multiple refreshes have failed, there is a possibility that retry is ongoing while another refresh gets + // triggered. After the segments have been uploaded and before the below code runs, the updateLocalRefreshTimeAndSeqNo + // method is triggered, which will update the local localRefreshTimeMs. Now, the lag would basically become the + // time since the last refresh happened locally. + this.remoteRefreshStartTimeMs = refreshTimeMs == this.localRefreshTimeMs ? -1 : this.localRefreshTimeMs; } public void updateRemoteRefreshClockTimeMs(long remoteRefreshClockTimeMs) { @@ -243,12 +257,11 @@ public long getRefreshSeqNoLag() { return refreshSeqNoLag; } - private void computeTimeMsLag() { - timeMsLag = localRefreshTimeMs - remoteRefreshTimeMs; - } - public long getTimeMsLag() { - return timeMsLag; + if (remoteRefreshTimeMs == localRefreshTimeMs) { + return 0; + } + return currentTimeMsUsingSystemNanos() - remoteRefreshStartTimeMs; } public long getBytesLag() { @@ -354,7 +367,7 @@ public RemoteSegmentTransferTracker.Stats stats() { shardId, localRefreshClockTimeMs, remoteRefreshClockTimeMs, - timeMsLag, + getTimeMsLag(), localRefreshSeqNo, remoteRefreshSeqNo, uploadBytesStarted.get(), diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java index 2920b33921869..33cd40f802d43 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java @@ -180,7 +180,6 @@ public boolean validate(RemoteSegmentTransferTracker pressureTracker, ShardId sh return true; } if (pressureTracker.isUploadTimeMovingAverageReady() == false) { - logger.trace("upload time moving average is not ready"); return true; } long timeLag = pressureTracker.getTimeMsLag(); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java index 0bf00f9e48137..c87cdfcc8f1a1 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.Map; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; + public class RemoteSegmentTransferTrackerTests extends OpenSearchTestCase { private RemoteStoreStatsTrackerFactory remoteStoreStatsTrackerFactory; private ClusterService clusterService; @@ -92,7 +94,7 @@ public void testUpdateLocalRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000L + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateLocalRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getLocalRefreshTimeMs()); } @@ -103,7 +105,7 @@ public void testUpdateRemoteRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000 + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateRemoteRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getRemoteRefreshTimeMs()); } @@ -133,20 +135,29 @@ public void testComputeSeqNoLagOnUpdate() { assertEquals(localRefreshSeqNo - remoteRefreshSeqNo, transferTracker.getRefreshSeqNoLag()); } - public void testComputeTimeLagOnUpdate() { + public void testComputeTimeLagOnUpdate() throws InterruptedException { transferTracker = new RemoteSegmentTransferTracker( shardId, directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long currentLocalRefreshTimeMs = transferTracker.getLocalRefreshTimeMs(); - long currentTimeMs = System.nanoTime() / 1_000_000L; - long localRefreshTimeMs = currentTimeMs + randomIntBetween(100, 500); - long remoteRefreshTimeMs = currentTimeMs + randomIntBetween(50, 99); - transferTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); - assertEquals(localRefreshTimeMs - currentLocalRefreshTimeMs, transferTracker.getTimeMsLag()); - transferTracker.updateRemoteRefreshTimeMs(remoteRefreshTimeMs); - assertEquals(localRefreshTimeMs - remoteRefreshTimeMs, transferTracker.getTimeMsLag()); + + // No lag if there is a remote upload corresponding to a local refresh + assertEquals(0, transferTracker.getTimeMsLag()); + + // Set a local refresh time that is higher than remote refresh time + Thread.sleep(1); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + + // Sleep for 100ms and then the lag should be within 100ms +/- 20ms + Thread.sleep(100); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - 100) <= 20); + + transferTracker.updateRemoteRefreshTimeMs(transferTracker.getLocalRefreshTimeMs()); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + long random = randomIntBetween(50, 200); + Thread.sleep(random); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - random) <= 20); } public void testAddUploadBytesStarted() { @@ -519,7 +530,7 @@ public void testStatsObjectCreation() { transferTracker = constructTracker(); RemoteSegmentTransferTracker.Stats transferTrackerStats = transferTracker.stats(); assertEquals(transferTracker.getShardId(), transferTrackerStats.shardId); - assertEquals(transferTracker.getTimeMsLag(), (int) transferTrackerStats.refreshTimeLagMs); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - transferTrackerStats.refreshTimeLagMs) <= 20); assertEquals(transferTracker.getLocalRefreshSeqNo(), (int) transferTrackerStats.localRefreshNumber); assertEquals(transferTracker.getRemoteRefreshSeqNo(), (int) transferTrackerStats.remoteRefreshNumber); assertEquals(transferTracker.getBytesLag(), (int) transferTrackerStats.bytesLag); @@ -591,9 +602,9 @@ private RemoteSegmentTransferTracker constructTracker() { ); transferTracker.incrementTotalUploadsStarted(); transferTracker.incrementTotalUploadsFailed(); - transferTracker.updateUploadTimeMovingAverage(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateUploadTimeMovingAverage(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.updateUploadBytesMovingAverage(99); - transferTracker.updateRemoteRefreshTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateRemoteRefreshTimeMs(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.incrementRejectionCount(); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesStarted(10); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesSucceeded(10, System.currentTimeMillis()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java index de610083f3327..cb77174e612fd 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java @@ -21,8 +21,11 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.IntStream; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; import static org.opensearch.index.remote.RemoteStoreTestsHelper.createIndexShard; public class RemoteStorePressureServiceTests extends OpenSearchTestCase { @@ -68,7 +71,7 @@ public void testIsSegmentsUploadBackpressureEnabled() { assertTrue(pressureService.isSegmentsUploadBackpressureEnabled()); } - public void testValidateSegmentUploadLag() { + public void testValidateSegmentUploadLag() throws InterruptedException { // Create the pressure tracker IndexShard indexShard = createIndexShard(shardId, true); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, Settings.EMPTY); @@ -86,14 +89,27 @@ public void testValidateSegmentUploadLag() { sum.addAndGet(i); }); double avg = (double) sum.get() / 20; - long currentMs = System.nanoTime() / 1_000_000; - pressureTracker.updateLocalRefreshTimeMs((long) (currentMs + 12 * avg)); - pressureTracker.updateRemoteRefreshTimeMs(currentMs); - Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); - assertTrue(e.getMessage().contains("due to remote segments lagging behind local segments")); - assertTrue(e.getMessage().contains("time_lag:114 ms dynamic_time_lag_threshold:95.0 ms")); - pressureTracker.updateRemoteRefreshTimeMs((long) (currentMs + 2 * avg)); + // We run this to ensure that the local and remote refresh time are not same anymore + while (pressureTracker.getLocalRefreshTimeMs() == currentTimeMsUsingSystemNanos()) { + Thread.sleep(10); + } + long localRefreshTimeMs = currentTimeMsUsingSystemNanos(); + pressureTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); + + while (currentTimeMsUsingSystemNanos() - localRefreshTimeMs <= 20 * avg) { + Thread.sleep((long) (4 * avg)); + } + Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); + String regex = "^rejected execution on primary shard:\\[index]\\[0] due to remote segments lagging behind " + + "local segments.time_lag:[0-9]{2,3} ms dynamic_time_lag_threshold:95\\.0 ms$"; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(e.getMessage()); + assertTrue(matcher.matches()); + + pressureTracker.updateRemoteRefreshTimeMs(pressureTracker.getLocalRefreshTimeMs()); + pressureTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + Thread.sleep((long) (2 * avg)); pressureService.validateSegmentsUploadLag(shardId); // 2. bytes lag more than dynamic threshold From 911afc4002b3a9faa86e109871332af0caaa73d0 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Sun, 22 Oct 2023 11:31:31 +0530 Subject: [PATCH 22/26] Remove remote store attributes from DiscoveryNode toString() (#10810) Signed-off-by: Dhwanil Patel --- .../cluster/node/DiscoveryNode.java | 8 +++++++- .../cluster/node/DiscoveryNodeTests.java | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 4e49b25eb5789..0c58aabf95207 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -553,7 +553,13 @@ public String toString() { sb.append('}'); } if (!attributes.isEmpty()) { - sb.append(attributes); + sb.append( + attributes.entrySet() + .stream() + .filter(entry -> !entry.getKey().startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)) // filter remote_store attributes + // from logging to reduce noise. + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)) + ); } return sb.toString(); } diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java index 8b61e8f6d724d..c8a6fc76ce820 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java @@ -38,13 +38,16 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.NodeRoles; import org.opensearch.test.OpenSearchTestCase; import java.net.InetAddress; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -81,6 +84,22 @@ public void testRolesAreSorted() { } + public void testRemoteStoreRedactionInToString() { + final Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.BUILT_IN_ROLES)); + Map attributes = new HashMap<>(); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + final DiscoveryNode node = new DiscoveryNode( + "name", + "id", + new TransportAddress(TransportAddress.META_ADDRESS, 9200), + attributes, + roles, + Version.CURRENT + ); + assertFalse(node.toString().contains(RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + } + public void testDiscoveryNodeIsCreatedWithHostFromInetAddress() throws Exception { InetAddress inetAddress = randomBoolean() ? InetAddress.getByName("192.0.2.1") From 79c6e322f584e72b48793658f6a3456d1eea1fc2 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Sun, 22 Oct 2023 11:54:10 +0530 Subject: [PATCH 23/26] Adds util to compare Metadata for remote state (#10661) * Adds util to compare Metadata for remote state durng restore flow when identifying if 2 clusterUUIDs have same cluster state Signed-off-by: bansvaru --- .../opensearch/cluster/metadata/Metadata.java | 17 +- .../remote/RemoteClusterStateService.java | 12 +- .../cluster/metadata/MetadataTests.java | 33 ++++ .../RemoteClusterStateServiceTests.java | 163 ++++++++++++------ 4 files changed, 168 insertions(+), 57 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 626903877b0c6..70c1d059a1b9e 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -922,19 +922,26 @@ public static boolean isGlobalStateEquals(Metadata metadata1, Metadata metadata2 if (!metadata1.coordinationMetadata.equals(metadata2.coordinationMetadata)) { return false; } - if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { + if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { return false; } - if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { + if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { return false; } - if (!metadata1.templates.equals(metadata2.templates())) { + if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { return false; } - if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { + return isGlobalResourcesMetadataEquals(metadata1, metadata2); + } + + /** + * Compares Metadata entities persisted in Remote Store. + */ + public static boolean isGlobalResourcesMetadataEquals(Metadata metadata1, Metadata metadata2) { + if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { return false; } - if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { + if (!metadata1.templates.equals(metadata2.templates())) { return false; } // Check if any persistent metadata needs to be saved diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 96ce2fc779ea0..13fbda77d8a2d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -689,10 +689,11 @@ private Map getIndexMetadataMap( * @return {@link IndexMetadata} */ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, UploadedIndexMetadata uploadedIndexMetadata) { + BlobContainer blobContainer = indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()); try { String[] splitPath = uploadedIndexMetadata.getUploadedFilename().split("/"); return INDEX_METADATA_FORMAT.read( - indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()), + blobContainer, splitPath[splitPath.length - 1], blobStoreRepository.getNamedXContentRegistry() ); @@ -882,7 +883,8 @@ private Map trimClusterUUIDs( } } else { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); - if (isMetadataEqual(currentManifest, previousManifest, clusterName)) { + if (isMetadataEqual(currentManifest, previousManifest, clusterName) + && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { trimmedUUIDs.remove(clusterUUID); } } @@ -912,6 +914,12 @@ private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataMa return true; } + private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) { + Metadata secondGlobalMetadata = getGlobalMetadata(clusterName, second.getClusterUUID(), second); + Metadata firstGlobalMetadata = getGlobalMetadata(clusterName, first.getClusterUUID(), first); + return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); + } + private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { return !manifest.isClusterUUIDCommitted(); } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index 40eefa6cdbf03..618fcb923bc60 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -627,6 +627,39 @@ public void testGlobalStateEqualsCoordinationMetadata() { assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); } + public void testGlobalResourcesStateEqualsCoordinationMetadata() { + CoordinationMetadata coordinationMetadata1 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata1 = Metadata.builder() + .coordinationMetadata(coordinationMetadata1) + .clusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(false) + .hashesOfConsistentSettings(Map.of("a", "b")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + CoordinationMetadata coordinationMetadata2 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata2 = Metadata.builder() + .coordinationMetadata(coordinationMetadata2) + .clusterUUIDCommitted(true) + .clusterUUID(randomAlphaOfLength(11)) + .hashesOfConsistentSettings(Map.of("b", "a")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + + assertTrue(Metadata.isGlobalStateEquals(metadata1, metadata1)); + assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); + assertTrue(Metadata.isGlobalResourcesMetadataEquals(metadata1, metadata2)); + } + public void testSerializationWithIndexGraveyard() throws IOException { final IndexGraveyard graveyard = IndexGraveyardTests.createRandom(); final Metadata originalMeta = Metadata.builder().indexGraveyard(graveyard).build(); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 5202f31c514ed..55094400581b4 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.gateway.remote; import org.opensearch.Version; +import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.network.NetworkModule; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.ParseField; @@ -37,6 +39,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryMissingException; @@ -65,11 +68,14 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Stream; import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatchers; +import static java.util.stream.Collectors.toList; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; @@ -126,11 +132,19 @@ public void setup() { .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .build(); + NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( + Stream.of( + NetworkModule.getNamedXContents().stream(), + IndicesModule.getNamedXContents().stream(), + ClusterModule.getNamedXWriteables().stream() + ).flatMap(Function.identity()).collect(toList()) + ); + blobStoreRepository = mock(BlobStoreRepository.class); blobStore = mock(BlobStore.class); when(blobStoreRepository.blobStore()).thenReturn(blobStore); when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository); - when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry(new ArrayList<>())); + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(xContentRegistry); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -887,7 +901,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -1040,6 +1054,11 @@ public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Excepti } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + } + + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) + throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1061,7 +1080,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid1", clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), - uploadedIndexMetadataList1 + uploadedIndexMetadataList1, + "test-metadata1" ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1074,8 +1094,12 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata1 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); - mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1); + mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); + mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V1); List uploadedIndexMetadataList2 = List.of( new UploadedIndexMetadata("index1", "index-uuid1", "key1"), @@ -1085,7 +1109,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid2", clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), - uploadedIndexMetadataList2 + uploadedIndexMetadataList2, + "test-metadata2" ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1097,37 +1122,59 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata2 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap2 = Map.of("index-uuid1", indexMetadata3, "index-uuid2", indexMetadata4); - mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2); + mockBlobContainerForGlobalMetadata(blobContainer2, clusterManifest2, metadata2); + mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V1); + + // differGlobalMetadata controls which one of IndexMetadata or Metadata object would be different + // when comparing cluster-uuid3 and cluster-uuid1 state. + // if set true, only Metadata will differ b/w cluster uuid1 and cluster uuid3. + // If set to false, only IndexMetadata would be different + // Adding difference in EXACTLY on of these randomly will help us test if our uuid trimming logic compares both + // IndexMetadata and Metadata when deciding if the remote state b/w two different cluster uuids is same. + List uploadedIndexMetadataList3 = differGlobalMetadata + ? new ArrayList<>(uploadedIndexMetadataList1) + : List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); + IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + Map indexMetadataMap3 = differGlobalMetadata + ? new HashMap<>(indexMetadataMap1) + : Map.of("index-uuid1", indexMetadata5); + Metadata metadata3 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), !differGlobalMetadata).build()) + .build(); - List uploadedIndexMetadataList3 = List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); final ClusterMetadataManifest clusterManifest3 = generateClusterMetadataManifest( "cluster-uuid3", clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), - uploadedIndexMetadataList3 + uploadedIndexMetadataList3, + "test-metadata3" ); - IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); - Map indexMetadataMap3 = Map.of("index-uuid1", indexMetadata5); - mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3); - - when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn( - uuidBlobContainer, - blobContainer1, - blobContainer1, - blobContainer3, - blobContainer3, - blobContainer2, - blobContainer2, - blobContainer1, - blobContainer2, - blobContainer1, - blobContainer2 + mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); + mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); + + ArrayList mockBlobContainerOrderedList = new ArrayList<>( + List.of(blobContainer1, blobContainer1, blobContainer3, blobContainer3, blobContainer2, blobContainer2) ); + + if (differGlobalMetadata) { + mockBlobContainerOrderedList.addAll( + List.of(blobContainer3, blobContainer1, blobContainer3, blobContainer1, blobContainer1, blobContainer3) + ); + } + mockBlobContainerOrderedList.addAll( + List.of(blobContainer2, blobContainer1, blobContainer2, blobContainer1, blobContainer1, blobContainer2) + ); + BlobContainer[] mockBlobContainerOrderedArray = new BlobContainer[mockBlobContainerOrderedList.size()]; + mockBlobContainerOrderedList.toArray(mockBlobContainerOrderedArray); + when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn(uuidBlobContainer, mockBlobContainerOrderedArray); when(blobStoreRepository.getCompressor()).thenReturn(new DeflateCompressor()); } @@ -1135,7 +1182,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String clusterUUID, String previousClusterUUID, String stateUUID, - List uploadedIndexMetadata + List uploadedIndexMetadata, + String globalMetadataFileName ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1148,7 +1196,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) - .globalMetadataFileName("test-global-metadata") + .globalMetadataFileName(globalMetadataFileName) + .codecVersion(ClusterMetadataManifest.CODEC_V1) .build(); } @@ -1181,17 +1230,29 @@ private void mockBlobContainer( ClusterMetadataManifest clusterMetadataManifest, Map indexMetadataMap ) throws IOException { - BlobMetadata blobMetadata = new PlainBlobMetadata("manifestFileName", 1); + mockBlobContainer(blobContainer, clusterMetadataManifest, indexMetadataMap, ClusterMetadataManifest.CODEC_V0); + } + + private void mockBlobContainer( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Map indexMetadataMap, + int codecVersion + ) throws IOException { + String manifestFileName = codecVersion >= ClusterMetadataManifest.CODEC_V1 + ? "manifest__manifestFileName__abcd__abcd__abcd__1" + : "manifestFileName"; + BlobMetadata blobMetadata = new PlainBlobMetadata(manifestFileName, 1); when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) .thenReturn(Arrays.asList(blobMetadata)); BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, - "manifestFileName", + manifestFileName, blobStoreRepository.getCompressor(), FORMAT_PARAMS ); - when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + when(blobContainer.readBlob(manifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { try { @@ -1200,15 +1261,15 @@ private void mockBlobContainer( return; } String fileName = uploadedIndexMetadata.getUploadedFilename(); - BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( - indexMetadata, - fileName, - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); - when(blobContainer.readBlob(fileName + ".dat")).thenReturn( - new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) - ); + when(blobContainer.readBlob(fileName + ".dat")).thenAnswer((invocationOnMock) -> { + BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( + indexMetadata, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()); + }); } catch (IOException e) { throw new RuntimeException(e); } @@ -1238,15 +1299,17 @@ private void mockBlobContainerForGlobalMetadata( ); when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); - BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( - metadata, - "global-metadata-file", - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); - when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( - new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenAnswer( + (invocationOnMock) -> { + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()); + } ); } From e618b9bd27af47c4b1a04f4eaabc323a39573c40 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Sun, 22 Oct 2023 13:28:50 +0530 Subject: [PATCH 24/26] Fix custom metadata not getting stored for remote store not supporting async write (#10812) Signed-off-by: Dhwanil Patel --- .../RemoteStoreClusterStateRestoreIT.java | 28 +++++++++++-------- .../remote/RemoteClusterStateService.java | 8 +++++- .../blobstore/ChecksumBlobStoreFormat.java | 25 +++++++++++++++-- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index c2cb7cc60f152..f0863966fa222 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -233,8 +234,7 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { String prevClusterUUID = clusterService().state().metadata().clusterUUID(); // Create global metadata - register a custom repo - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // registerCustomRepository(); + Path repoPath = registerCustomRepository(); // Create global metadata - persistent settings updatePersistentSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 34).build()); @@ -263,30 +263,36 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); // validate global metadata restored - verifyRestoredRepositories(); + verifyRestoredRepositories(repoPath); verifyRestoredIndexTemplate(); } - private void registerCustomRepository() { + private Path registerCustomRepository() { + Path path = randomRepoPath(); assertAcked( client().admin() .cluster() .preparePutRepository("custom-repo") .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) + .setSettings(Settings.builder().put("location", path).put("compress", false)) .get() ); + return path; } - private void verifyRestoredRepositories() { + private void verifyRestoredRepositories(Path repoPath) { RepositoriesMetadata repositoriesMetadata = clusterService().state().metadata().custom(RepositoriesMetadata.TYPE); - assertEquals(2, repositoriesMetadata.repositories().size()); // includes remote store repo as well + assertEquals(3, repositoriesMetadata.repositories().size()); // includes remote store repo as well assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); - // assertEquals(Settings.builder().put("location", randomRepoPath()).put("compress", false).build(), - // repositoriesMetadata.repository("custom-repo").settings()); + assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + assertEquals( + Settings.builder().put("location", repoPath).put("compress", false).build(), + repositoriesMetadata.repository("custom-repo").settings() + ); + + // repo cleanup post verification + clusterAdmin().prepareDeleteRepository("custom-repo").get(); } private void addClusterLevelReadOnlyBlock() throws InterruptedException, ExecutionException { diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 13fbda77d8a2d..dfe3659c1efd1 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -568,7 +568,13 @@ private ClusterMetadataManifest uploadManifest( private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) throws IOException { final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); - CLUSTER_METADATA_MANIFEST_FORMAT.write(uploadManifest, metadataManifestContainer, fileName, blobStoreRepository.getCompressor()); + CLUSTER_METADATA_MANIFEST_FORMAT.write( + uploadManifest, + metadataManifestContainer, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); } private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 17cb68f798094..e280141c12bc1 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -170,8 +170,29 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + } + + /** + * Writes blob with resolving the blob name using {@link #blobName} method. + *

+ * The blob will optionally by compressed. + * + * @param obj object to be serialized + * @param blobContainer blob container + * @param name blob name + * @param compressor whether to use compression + * @param params ToXContent params + */ + public void write( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + final ToXContent.Params params + ) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + final BytesReference bytes = serialize(obj, blobName, compressor, params); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } @@ -195,7 +216,7 @@ public void writeAsync( final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor); + write(obj, blobContainer, name, compressor, params); listener.onResponse(null); return; } From a09047a4a9870eca642577094c9793256cbe71d1 Mon Sep 17 00:00:00 2001 From: Shivansh Arora <31575408+shiv0408@users.noreply.github.com> Date: Sun, 22 Oct 2023 13:41:50 +0530 Subject: [PATCH 25/26] Removed unnecessary catch statement (#10783) * Removed unnecessary catch statement related to repo missing exception in remote state flow Signed-off-by: Shivansh Arora --- .../main/java/org/opensearch/gateway/GatewayMetaState.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 9eb7fb0ca04d0..5d417ce78fe5c 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -67,7 +67,6 @@ import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; import org.opensearch.plugins.MetadataUpgrader; -import org.opensearch.repositories.RepositoryMissingException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -712,12 +711,6 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == assert verifyManifestAndClusterState(manifest, clusterState) == true : "Manifest and ClusterState are not in sync"; lastAcceptedManifest = manifest; lastAcceptedState = clusterState; - } catch (RepositoryMissingException e) { - // TODO This logic needs to be modified once PR for repo registration during bootstrap is pushed - // https://github.com/opensearch-project/OpenSearch/pull/9105/ - // After the above PR is pushed, we can remove this silent failure and throw the exception instead. - logger.error("Remote repository is not yet registered"); - lastAcceptedState = clusterState; } catch (Exception e) { handleExceptionOnWrite(e); } From 9b7a9d0026aa379537804fd24b95619abe88e1c0 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Sun, 22 Oct 2023 16:16:10 +0530 Subject: [PATCH 26/26] Override local disk state if we are able to restore from remote (#10748) * Override local disk state if we are able to restore from remote Signed-off-by: bansvaru --- .../RemoteStoreClusterStateRestoreIT.java | 68 +++++++++++++ .../opensearch/gateway/GatewayMetaState.java | 7 +- .../recovery/RemoteStoreRestoreService.java | 95 +++---------------- 3 files changed, 89 insertions(+), 81 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index f0863966fa222..29786158bc73c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -21,6 +21,7 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; @@ -32,6 +33,7 @@ import java.util.Objects; import java.util.concurrent.ExecutionException; +import static org.opensearch.cluster.coordination.ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_SETTING; import static org.opensearch.cluster.metadata.Metadata.CLUSTER_READ_ONLY_BLOCK; import static org.opensearch.cluster.metadata.Metadata.SETTING_READ_ONLY_SETTING; @@ -94,6 +96,72 @@ public void testFullClusterRestore() throws Exception { verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } + /** + * This test scenario covers the case where right after remote state restore and persisting it to disk via LucenePersistedState, full cluster restarts. + * This is a special case for remote state as at this point cluster uuid in the restored state is still ClusterState.UNKNOWN_UUID as we persist it disk. + * After restart the local disk state will be read but should be again overridden with remote state. + * + * 1. Form a cluster and index few docs + * 2. Replace all nodes to remove all local disk state + * 3. Start cluster manager node without correct seeding to ensure local disk state is written with cluster uuid ClusterState.UNKNOWN_UUID but with remote restored Metadata + * 4. Restart the cluster manager node with correct seeding. + * 5. After restart the cluster manager picks up the local disk state with has same Metadata as remote but cluster uuid is still ClusterState.UNKNOWN_UUID + * 6. The cluster manager will try to restore from remote again. + * 7. Metadata loaded from local disk state will be overridden with remote Metadata and no conflict should arise. + * 8. Add data nodes to recover index data + * 9. Verify Metadata and index data is restored. + */ + public void testFullClusterStateRestore() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + // index some data to generate files in remote directory + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + + // stop all nodes + internalCluster().stopAllNodes(); + + // start a cluster manager node with no cluster manager seeding. + // This should fail with IllegalStateException as cluster manager fails to form without any initial seed + assertThrows( + IllegalStateException.class, + () -> internalCluster().startClusterManagerOnlyNodes( + clusterManagerNodeCount, + Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey()) // disable seeding during bootstrapping + .build() + ) + ); + + // verify cluster manager not elected + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) + : "Disabling Cluster manager seeding failed. cluster uuid is not unknown"; + + // restart cluster manager with correct seed + internalCluster().fullRestart(new InternalTestCluster.RestartCallback() { + @Override + public Settings onNodeStopped(String nodeName) { + return Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) // Seed with correct Cluster Manager node + .build(); + } + }); + + // validate new cluster state formed + newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert !Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) : "cluster restart not successful. cluster uuid is still unknown"; + assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + validateMetadata(List.of(INDEX_NAME)); + + // start data nodes to trigger index data recovery + internalCluster().startDataOnlyNodes(dataNodeCount); + verifyRestoredData(indexStats, INDEX_NAME); + } + public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 5d417ce78fe5c..f855449c708d2 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -174,7 +174,9 @@ public void start( if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { // Load state from remote final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( - clusterState, + // Remote Metadata should always override local disk Metadata + // if local disk Metadata's cluster uuid is UNKNOWN_UUID + ClusterState.builder(clusterState).metadata(Metadata.EMPTY_METADATA).build(), lastKnownClusterUUID, false, new String[] {} @@ -549,6 +551,9 @@ static class LucenePersistedState implements PersistedState { // out by this version of OpenSearch. TODO TBD should we avoid indexing when possible? final PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter(); try { + // During remote state restore, there will be non empty metadata getting persisted with cluster UUID as + // ClusterState.UNKOWN_UUID . The valid UUID will be generated and persisted along with the first cluster state getting + // published. writer.writeFullStateAndCommit(currentTerm, lastAcceptedState); } catch (Exception e) { try { diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 6692d521b8f65..9541d13421e27 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -40,12 +40,10 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -146,6 +144,11 @@ public RemoteRestoreResult restore( || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { + // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to + // restore + if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { + throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); + } remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); remoteMetadata.getIndices().values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); @@ -158,12 +161,21 @@ public RemoteRestoreResult restore( IndexMetadata indexMetadata = currentState.metadata().index(indexName); if (indexMetadata == null) { logger.warn("Index restore is not supported for non-existent index. Skipping: {}", indexName); + } else if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false) == false) { + logger.warn("Remote store is not enabled for index: {}", indexName); + } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { + throw new IllegalStateException( + String.format( + Locale.ROOT, + "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster.", + indexName + ) + " Close the existing index." + ); } else { indexMetadataMap.put(indexName, new Tuple<>(false, indexMetadata)); } } } - validate(currentState, indexMetadataMap, restoreClusterUUID, restoreAllShards); return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); } @@ -272,83 +284,6 @@ private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMe repositoriesMetadata.ifPresent(metadata -> mdBuilder.putCustom(RepositoriesMetadata.TYPE, metadata)); } - /** - * Performs various validations needed before executing restore - * @param currentState current cluster state - * @param indexMetadataMap map of index metadata to restore - * @param restoreClusterUUID cluster UUID used to restore IndexMetadata - * @param restoreAllShards indicates if all shards of the index needs to be restored. This flat is ignored if remoteClusterUUID is provided - */ - private void validate( - ClusterState currentState, - Map> indexMetadataMap, - @Nullable String restoreClusterUUID, - boolean restoreAllShards - ) throws IllegalStateException, IllegalArgumentException { - String errorMsg = "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster."; - - // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to - // restore - if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { - throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); - } - for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { - String indexName = indexMetadataEntry.getKey(); - IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - String indexUUID = indexMetadata.getIndexUUID(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); - if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false)) { - if (metadataFromRemoteStore) { - Set graveyardIndexNames = new HashSet<>(); - Set graveyardIndexUUID = new HashSet<>(); - Set liveClusterIndexUUIDs = currentState.metadata() - .indices() - .values() - .stream() - .map(IndexMetadata::getIndexUUID) - .collect(Collectors.toSet()); - - currentState.metadata().indexGraveyard().getTombstones().forEach(tombstone -> { - graveyardIndexNames.add(tombstone.getIndex().getName()); - graveyardIndexUUID.add(tombstone.getIndex().getUUID()); - }); - - // Since updates to graveyard are synced to remote we should neven land in a situation where remote contain index - // metadata for graveyard index. - assert graveyardIndexNames.contains(indexName) == false : String.format( - Locale.ROOT, - "Index name [%s] exists in graveyard!", - indexName - ); - assert graveyardIndexUUID.contains(indexUUID) == false : String.format( - Locale.ROOT, - "Index UUID [%s] exists in graveyard!", - indexUUID - ); - - // Any indices being restored from remote cluster state should not already be part of the cluster as this causes - // conflict - boolean sameNameIndexExists = currentState.metadata().hasIndex(indexName); - boolean sameUUIDIndexExists = liveClusterIndexUUIDs.contains(indexUUID); - if (sameNameIndexExists || sameUUIDIndexExists) { - String finalErrorMsg = String.format(Locale.ROOT, errorMsg, indexName); - logger.info(finalErrorMsg); - throw new IllegalStateException(finalErrorMsg); - } - - boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(indexMetadata.getSettings()); - createIndexService.validateIndexName(indexName, currentState); - createIndexService.validateDotIndex(indexName, isHidden); - shardLimitValidator.validateShardLimit(indexName, indexMetadata.getSettings(), currentState); - } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { - throw new IllegalStateException(String.format(Locale.ROOT, errorMsg, indexName) + " Close the existing index."); - } - } else { - logger.warn("Remote store is not enabled for index: {}", indexName); - } - } - } - /** * Result of a remote restore operation. */