From 5ba909a982e35172cd8774eabb726b6636d0018d Mon Sep 17 00:00:00 2001 From: Prudhvi Godithi Date: Mon, 9 Dec 2024 15:12:56 -0800 Subject: [PATCH 1/7] Overflow prevention (#16812) Signed-off-by: Prudhvi Godithi --- CHANGELOG.md | 1 + .../org/opensearch/common/time/DateUtils.java | 24 +++ .../index/mapper/DateFieldMapper.java | 4 +- .../common/time/DateUtilsTests.java | 17 ++ .../index/mapper/DateFieldMapperTests.java | 2 - .../index/mapper/DateFieldTypeTests.java | 199 ++++++++++++++++++ 6 files changed, 243 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723ad7f1d80ad..5bab36a15d958 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271)) - Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)). - Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/)) +- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)). ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/server/src/main/java/org/opensearch/common/time/DateUtils.java b/server/src/main/java/org/opensearch/common/time/DateUtils.java index 7ab395a1117e7..e5a019b58f7da 100644 --- a/server/src/main/java/org/opensearch/common/time/DateUtils.java +++ b/server/src/main/java/org/opensearch/common/time/DateUtils.java @@ -272,6 +272,30 @@ public static Instant clampToNanosRange(Instant instant) { return instant; } + static final Instant INSTANT_LONG_MIN_VALUE = Instant.ofEpochMilli(Long.MIN_VALUE); + static final Instant INSTANT_LONG_MAX_VALUE = Instant.ofEpochMilli(Long.MAX_VALUE); + + /** + * Clamps the given {@link Instant} to the valid epoch millisecond range. + * + * - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}. + * - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}. + * - Otherwise, it returns the input as-is. + * + * @param instant the {@link Instant} to clamp + * @return the clamped {@link Instant} + * @throws NullPointerException if the input is {@code null} + */ + public static Instant clampToMillisRange(Instant instant) { + if (instant.isBefore(INSTANT_LONG_MIN_VALUE)) { + return INSTANT_LONG_MIN_VALUE; + } + if (instant.isAfter(INSTANT_LONG_MAX_VALUE)) { + return INSTANT_LONG_MAX_VALUE; + } + return instant; + } + /** * convert a long value to a java time instant * the long value resembles the nanoseconds since the epoch diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index 7fbb38c47572c..effee53d7cf63 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -122,7 +122,7 @@ public enum Resolution { MILLISECONDS(CONTENT_TYPE, NumericType.DATE) { @Override public long convert(Instant instant) { - return instant.toEpochMilli(); + return clampToValidRange(instant).toEpochMilli(); } @Override @@ -132,7 +132,7 @@ public Instant toInstant(long value) { @Override public Instant clampToValidRange(Instant instant) { - return instant; + return DateUtils.clampToMillisRange(instant); } @Override diff --git a/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java b/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java index 98a79f3ca38dc..cb691f2177f6d 100644 --- a/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java +++ b/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java @@ -260,4 +260,21 @@ public void testRoundYear() { long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); assertThat(DateUtils.roundYear(endOf1996), is(startOf1996)); } + + public void testClampToMillisRange() { + Instant normalInstant = Instant.now(); + assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant)); + + Instant beforeMinInstant = DateUtils.INSTANT_LONG_MIN_VALUE.minusMillis(1); + assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(beforeMinInstant)); + + Instant afterMaxInstant = DateUtils.INSTANT_LONG_MAX_VALUE.plusMillis(1); + assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(afterMaxInstant)); + + assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MIN_VALUE)); + + assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MAX_VALUE)); + + assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null)); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java index 98bcaa3a1a46b..9032e2cdaed16 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java @@ -156,7 +156,6 @@ public void testIgnoreMalformedLegacy() throws IOException { "failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]" ); testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648"); - testIgnoreMalformedForValue("-522000000", "long overflow"); } public void testIgnoreMalformed() throws IOException { @@ -170,7 +169,6 @@ public void testIgnoreMalformed() throws IOException { "failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]" ); testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648"); - testIgnoreMalformedForValue("-522000000", "long overflow"); } private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException { diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java index 15b16f4610062..52091d571ee72 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java @@ -31,20 +31,32 @@ package org.opensearch.index.mapper; +import org.apache.lucene.document.Field; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -71,8 +83,12 @@ import org.joda.time.DateTimeZone; import java.io.IOException; +import java.time.Instant; import java.time.ZoneOffset; +import java.util.Arrays; import java.util.Collections; +import java.util.List; +import java.util.Locale; import static org.hamcrest.CoreMatchers.is; import static org.apache.lucene.document.LongPoint.pack; @@ -490,4 +506,187 @@ public void testParseSourceValueNanos() throws IOException { MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate); assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null)); } + + public void testDateResolutionForOverflow() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + + DateFieldType ft = new DateFieldType( + "test_date", + true, + true, + true, + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"), + Resolution.MILLISECONDS, + null, + Collections.emptyMap() + ); + + List dates = Arrays.asList( + null, + "2020-01-01T00:00:00Z", + null, + "2021-01-01T00:00:00Z", + "+292278994-08-17T07:12:55.807Z", + null, + "-292275055-05-16T16:47:04.192Z" + ); + + int numNullDates = 0; + long minDateValue = Long.MAX_VALUE; + long maxDateValue = Long.MIN_VALUE; + + for (int i = 0; i < dates.size(); i++) { + ParseContext.Document doc = new ParseContext.Document(); + String dateStr = dates.get(i); + + if (dateStr != null) { + long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant()); + doc.add(new LongPoint(ft.name(), timestamp)); + doc.add(new SortedNumericDocValuesField(ft.name(), timestamp)); + doc.add(new StoredField(ft.name(), timestamp)); + doc.add(new StoredField("id", i)); + minDateValue = Math.min(minDateValue, timestamp); + maxDateValue = Math.max(maxDateValue, timestamp); + } else { + numNullDates++; + doc.add(new StoredField("id", i)); + } + w.addDocument(doc); + } + + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = new IndexSearcher(reader); + + Settings indexSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .build(); + QueryShardContext context = new QueryShardContext( + 0, + new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings), + BigArrays.NON_RECYCLING_INSTANCE, + null, + null, + null, + null, + null, + xContentRegistry(), + writableRegistry(), + null, + null, + () -> nowInMillis, + null, + null, + () -> true, + null + ); + + Query rangeQuery = ft.rangeQuery( + "-292275055-05-16T16:47:04.192Z", + "+292278994-08-17T07:12:55.807Z", + true, + true, + null, + null, + null, + context + ); + + TopDocs topDocs = searcher.search(rangeQuery, dates.size()); + assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value); + + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc); + IndexableField dateField = doc.getField(ft.name()); + if (dateField != null) { + long dateValue = dateField.numericValue().longValue(); + assertTrue( + "Date value " + dateValue + " should be within valid range", + dateValue >= minDateValue && dateValue <= maxDateValue + ); + } + } + + DateFieldType ftWithNullValue = new DateFieldType( + "test_date", + true, + true, + true, + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"), + Resolution.MILLISECONDS, + "2020-01-01T00:00:00Z", + Collections.emptyMap() + ); + + Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context); + topDocs = searcher.search(nullValueQuery, dates.size()); + assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value); + + IOUtils.close(reader, w, dir); + } + + public void testDateFieldTypeWithNulls() throws IOException { + DateFieldType ft = new DateFieldType( + "domainAttributes.dueDate", + true, + true, + true, + DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"), + Resolution.MILLISECONDS, + null, + Collections.emptyMap() + ); + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + + int nullDocs = 3500; + int datedDocs = 50; + + for (int i = 0; i < nullDocs; i++) { + ParseContext.Document doc = new ParseContext.Document(); + doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES)); + w.addDocument(doc); + } + + for (int i = 1; i <= datedDocs; i++) { + ParseContext.Document doc = new ParseContext.Document(); + String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1); + long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant()); + doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES)); + doc.add(new LongPoint(ft.name(), timestamp)); + doc.add(new SortedNumericDocValuesField(ft.name(), timestamp)); + doc.add(new StoredField(ft.name(), timestamp)); + w.addDocument(doc); + } + + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = new IndexSearcher(reader); + + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST); + + Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false)); + + for (int i = 0; i < 100; i++) { + TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort); + assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc); + IndexableField dateField = doc.getField(ft.name()); + if (dateField != null) { + long dateValue = dateField.numericValue().longValue(); + Instant dateInstant = Instant.ofEpochMilli(dateValue); + assertTrue( + "Date should be in March 2022", + dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z")) + && dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z")) + ); + } + } + } + IOUtils.close(reader, w, dir); + } } From da6eda776a0c33f75da3645b04218c35d44d3aa7 Mon Sep 17 00:00:00 2001 From: Pranshu Shukla <55992439+Pranshu-S@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:35:56 +0530 Subject: [PATCH 2/7] Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state (#16763) * Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state Signed-off-by: Pranshu Shukla --- CHANGELOG.md | 1 + .../discovery/DiscoveryDisruptionIT.java | 152 ++++++++++++++++++ .../remotestore/RemoteStoreNodeService.java | 15 ++ .../repositories/RepositoriesService.java | 7 + .../coordination/JoinTaskExecutorTests.java | 67 ++++++++ .../opensearch/test/InternalTestCluster.java | 20 ++- .../test/OpenSearchIntegTestCase.java | 39 +++++ 7 files changed, 300 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bab36a15d958..2aeb915ed6143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Tiered Caching] Fix bug in cache stats API ([#16560](https://github.com/opensearch-project/OpenSearch/pull/16560)) - Bound the size of cache in deprecation logger ([16702](https://github.com/opensearch-project/OpenSearch/issues/16702)) - Ensure consistency of system flag on IndexMetadata after diff is applied ([#16644](https://github.com/opensearch-project/OpenSearch/pull/16644)) +- Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state ([#16763](https://github.com/opensearch-project/OpenSearch/pull/16763)) ### Security diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java index 70124c8c46700..377f99cd8b791 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/DiscoveryDisruptionIT.java @@ -33,12 +33,21 @@ package org.opensearch.discovery; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.FailedToCommitClusterStateException; import org.opensearch.cluster.coordination.JoinHelper; +import org.opensearch.cluster.coordination.PersistedStateRegistry; import org.opensearch.cluster.coordination.PublicationTransportHandler; +import org.opensearch.cluster.metadata.RepositoriesMetadata; +import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Randomness; import org.opensearch.common.settings.Settings; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryMissingException; +import org.opensearch.repositories.fs.ReloadableFsRepository; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.disruption.NetworkDisruption; import org.opensearch.test.disruption.ServiceDisruptionScheme; @@ -46,10 +55,15 @@ import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.Transport; import org.opensearch.transport.TransportService; +import org.junit.Assert; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING; @@ -250,4 +264,142 @@ public void testNodeNotReachableFromClusterManager() throws Exception { ensureStableCluster(3); } + /** + * Tests the scenario where-in a cluster-state containing new repository meta-data as part of a node-join from a + * repository-configured node fails on a commit stag and has a master switch. This would lead to master nodes + * doing another round of node-joins with the new cluster-state as the previous attempt had a successful publish. + */ + public void testElectClusterManagerRemotePublicationConfigurationNodeJoinCommitFails() throws Exception { + final String remoteStateRepoName = "remote-state-repo"; + final String remoteRoutingTableRepoName = "routing-table-repo"; + + Settings remotePublicationSettings = buildRemotePublicationNodeAttributes( + remoteStateRepoName, + ReloadableFsRepository.TYPE, + remoteRoutingTableRepoName, + ReloadableFsRepository.TYPE + ); + internalCluster().startClusterManagerOnlyNodes(3); + internalCluster().startDataOnlyNodes(3); + + String clusterManagerNode = internalCluster().getClusterManagerName(); + List nonClusterManagerNodes = Arrays.stream(internalCluster().getNodeNames()) + .filter(node -> !node.equals(clusterManagerNode)) + .collect(Collectors.toList()); + + ensureStableCluster(6); + + MockTransportService clusterManagerTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + clusterManagerNode + ); + logger.info("Blocking Cluster Manager Commit Request on all nodes"); + // This is to allow the new node to have commit failures on the nodes in the send path itself. This will lead to the + // nodes have a successful publish operation but failed commit operation. This will come into play once the new node joins + nonClusterManagerNodes.forEach(node -> { + TransportService targetTransportService = internalCluster().getInstance(TransportService.class, node); + clusterManagerTransportService.addSendBehavior(targetTransportService, (connection, requestId, action, request, options) -> { + if (action.equals(PublicationTransportHandler.COMMIT_STATE_ACTION_NAME)) { + logger.info("--> preventing {} request", PublicationTransportHandler.COMMIT_STATE_ACTION_NAME); + throw new FailedToCommitClusterStateException("Blocking Commit"); + } + connection.sendRequest(requestId, action, request, options); + }); + }); + + logger.info("Starting Node with remote publication settings"); + // Start a node with remote-publication repositories configured. This will lead to the active cluster-manager create + // a new cluster-state event with the new node-join along with new repositories setup in the cluster meta-data. + internalCluster().startDataOnlyNodes(1, remotePublicationSettings, Boolean.TRUE); + + // Checking if publish succeeded in the nodes before shutting down the blocked cluster-manager + assertBusy(() -> { + String randomNode = nonClusterManagerNodes.get(Randomness.get().nextInt(nonClusterManagerNodes.size())); + PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, randomNode); + + ClusterState state = registry.getPersistedState(PersistedStateRegistry.PersistedStateType.LOCAL).getLastAcceptedState(); + RepositoriesMetadata repositoriesMetadata = state.metadata().custom(RepositoriesMetadata.TYPE); + Boolean isRemoteStateRepoConfigured = Boolean.FALSE; + Boolean isRemoteRoutingTableRepoConfigured = Boolean.FALSE; + + assertNotNull(repositoriesMetadata); + assertNotNull(repositoriesMetadata.repositories()); + + for (RepositoryMetadata repo : repositoriesMetadata.repositories()) { + if (repo.name().equals(remoteStateRepoName)) { + isRemoteStateRepoConfigured = Boolean.TRUE; + } else if (repo.name().equals(remoteRoutingTableRepoName)) { + isRemoteRoutingTableRepoConfigured = Boolean.TRUE; + } + } + // Asserting that the metadata is present in the persisted cluster-state + assertTrue(isRemoteStateRepoConfigured); + assertTrue(isRemoteRoutingTableRepoConfigured); + + RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, randomNode); + + isRemoteStateRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteStateRepoName); + isRemoteRoutingTableRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteRoutingTableRepoName); + + // Asserting that the metadata is not present in the repository service. + Assert.assertFalse(isRemoteStateRepoConfigured); + Assert.assertFalse(isRemoteRoutingTableRepoConfigured); + }); + + logger.info("Stopping current Cluster Manager"); + // We stop the current cluster-manager whose outbound paths were blocked. This is to force a new election onto nodes + // we had the new cluster-state published but not commited. + internalCluster().stopCurrentClusterManagerNode(); + + // We expect that the repositories validations are skipped in this case and node-joins succeeds as expected. The + // repositories validations are skipped because even though the cluster-state is updated in the persisted registry, + // the repository service will not be updated as the commit attempt failed. + ensureStableCluster(6); + + String randomNode = nonClusterManagerNodes.get(Randomness.get().nextInt(nonClusterManagerNodes.size())); + + // Checking if the final cluster-state is updated. + RepositoriesMetadata repositoriesMetadata = internalCluster().getInstance(ClusterService.class, randomNode) + .state() + .metadata() + .custom(RepositoriesMetadata.TYPE); + + Boolean isRemoteStateRepoConfigured = Boolean.FALSE; + Boolean isRemoteRoutingTableRepoConfigured = Boolean.FALSE; + + for (RepositoryMetadata repo : repositoriesMetadata.repositories()) { + if (repo.name().equals(remoteStateRepoName)) { + isRemoteStateRepoConfigured = Boolean.TRUE; + } else if (repo.name().equals(remoteRoutingTableRepoName)) { + isRemoteRoutingTableRepoConfigured = Boolean.TRUE; + } + } + + Assert.assertTrue("RemoteState Repo is not set in RepositoriesMetadata", isRemoteStateRepoConfigured); + Assert.assertTrue("RemoteRoutingTable Repo is not set in RepositoriesMetadata", isRemoteRoutingTableRepoConfigured); + + RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, randomNode); + + isRemoteStateRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteStateRepoName); + isRemoteRoutingTableRepoConfigured = isRepoPresentInRepositoryService(repositoriesService, remoteRoutingTableRepoName); + + Assert.assertTrue("RemoteState Repo is not set in RepositoryService", isRemoteStateRepoConfigured); + Assert.assertTrue("RemoteRoutingTable Repo is not set in RepositoryService", isRemoteRoutingTableRepoConfigured); + + logger.info("Stopping current Cluster Manager"); + } + + private Boolean isRepoPresentInRepositoryService(RepositoriesService repositoriesService, String repoName) { + try { + Repository remoteStateRepo = repositoriesService.repository(repoName); + if (Objects.nonNull(remoteStateRepo)) { + return Boolean.TRUE; + } + } catch (RepositoryMissingException e) { + return Boolean.FALSE; + } + + return Boolean.FALSE; + } + } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index c1c041ce01198..fb97cf40d90d6 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -21,6 +21,7 @@ import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.RepositoryException; +import org.opensearch.repositories.RepositoryMissingException; import org.opensearch.threadpool.ThreadPool; import java.util.ArrayList; @@ -183,6 +184,20 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode boolean repositoryAlreadyPresent = false; for (RepositoryMetadata existingRepositoryMetadata : existingRepositories.repositories()) { if (newRepositoryMetadata.name().equals(existingRepositoryMetadata.name())) { + try { + // This is to handle cases where-in the during a previous node-join attempt if the publish operation succeeded + // but the commit operation failed, the cluster-state may have the repository metadata which is not applied + // into the repository service. This may lead to assertion failures down the line. + repositoriesService.get().repository(newRepositoryMetadata.name()); + } catch (RepositoryMissingException e) { + logger.warn( + "Skipping repositories metadata checks: Remote repository [{}] is in the cluster state but not present " + + "in the repository service.", + newRepositoryMetadata.name() + ); + break; + } + try { // This will help in handling two scenarios - // 1. When a fresh cluster is formed and a node tries to join the cluster, the repository diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 9aec81536dbd0..49065be0abb25 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -80,6 +80,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -904,6 +905,12 @@ public void ensureValidSystemRepositoryUpdate(RepositoryMetadata newRepositoryMe Settings newRepositoryMetadataSettings = newRepositoryMetadata.settings(); Settings currentRepositoryMetadataSettings = currentRepositoryMetadata.settings(); + assert Objects.nonNull(repository) : String.format( + Locale.ROOT, + "repository [%s] not present in RepositoryService", + currentRepositoryMetadata.name() + ); + List restrictedSettings = repository.getRestrictedSystemRepositorySettings() .stream() .map(setting -> setting.getKey()) diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index f6fb203bfe1a9..9590e5615d451 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -55,6 +55,7 @@ import org.opensearch.common.util.FeatureFlags; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.RepositoryMissingException; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; @@ -1378,6 +1379,72 @@ public void testJoinRemoteStoreClusterWithRemotePublicationNodeInMixedMode() { JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testUpdatesClusterStateWithRepositoryMetadataNotInSync() throws Exception { + Map newNodeAttributes = new HashMap<>(); + newNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + newNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + RepositoriesService repositoriesService = mock(RepositoriesService.class); + when(repositoriesService.repository(any())).thenThrow(RepositoryMissingException.class); + final RemoteStoreNodeService remoteStoreNodeService = new RemoteStoreNodeService(new SetOnce<>(repositoriesService)::get, null); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor( + Settings.EMPTY, + allocationService, + logger, + rerouteService, + remoteStoreNodeService + ); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + newNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final RepositoryMetadata clusterStateRepo = buildRepositoryMetadata(clusterManagerNode, CLUSTER_STATE_REPO); + final RepositoryMetadata routingTableRepo = buildRepositoryMetadata(clusterManagerNode, ROUTING_TABLE_REPO); + List repositoriesMetadata = new ArrayList<>() { + { + add(clusterStateRepo); + add(routingTableRepo); + } + }; + + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .metadata(Metadata.builder().putCustom(RepositoriesMetadata.TYPE, new RepositoriesMetadata(repositoriesMetadata))) + .build(); + + final DiscoveryNode joiningNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + newNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterState, + List.of(new JoinTaskExecutor.Task(joiningNode, "test")) + ); + assertThat(result.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next(); + assertTrue(taskResult.isSuccess()); + validatePublicationRepositoryMetadata(result.resultingState, clusterManagerNode); + + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index fa5fb736f518f..7b2c653e9bdb2 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2322,10 +2322,24 @@ public List startNodes(int numOfNodes, Settings settings) { return startNodes(Collections.nCopies(numOfNodes, settings).toArray(new Settings[0])); } + /** + * Starts multiple nodes with the given settings and returns their names + */ + public List startNodes(int numOfNodes, Settings settings, Boolean waitForNodeJoin) { + return startNodes(waitForNodeJoin, Collections.nCopies(numOfNodes, settings).toArray(new Settings[0])); + } + /** * Starts multiple nodes with the given settings and returns their names */ public synchronized List startNodes(Settings... extraSettings) { + return startNodes(false, extraSettings); + } + + /** + * Starts multiple nodes with the given settings and returns their names + */ + public synchronized List startNodes(Boolean waitForNodeJoin, Settings... extraSettings) { final int newClusterManagerCount = Math.toIntExact(Stream.of(extraSettings).filter(DiscoveryNode::isClusterManagerNode).count()); final int defaultMinClusterManagerNodes; if (autoManageClusterManagerNodes) { @@ -2377,7 +2391,7 @@ public synchronized List startNodes(Settings... extraSettings) { nodes.add(nodeAndClient); } startAndPublishNodesAndClients(nodes); - if (autoManageClusterManagerNodes) { + if (autoManageClusterManagerNodes && !waitForNodeJoin) { validateClusterFormed(); } return nodes.stream().map(NodeAndClient::getName).collect(Collectors.toList()); @@ -2422,6 +2436,10 @@ public List startDataOnlyNodes(int numNodes, Settings settings) { return startNodes(numNodes, Settings.builder().put(onlyRole(settings, DiscoveryNodeRole.DATA_ROLE)).build()); } + public List startDataOnlyNodes(int numNodes, Settings settings, Boolean ignoreNodeJoin) { + return startNodes(numNodes, Settings.builder().put(onlyRole(settings, DiscoveryNodeRole.DATA_ROLE)).build(), ignoreNodeJoin); + } + public List startSearchOnlyNodes(int numNodes) { return startSearchOnlyNodes(numNodes, Settings.EMPTY); } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 1ee856d3092f0..1c26ea4ca2c91 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -214,6 +214,8 @@ import java.util.function.Function; import java.util.stream.Collectors; +import reactor.util.annotation.NonNull; + import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.common.unit.TimeValue.timeValueMillis; @@ -2915,6 +2917,43 @@ protected static Settings buildRemoteStoreNodeAttributes( return settings.build(); } + protected Settings buildRemotePublicationNodeAttributes( + @NonNull String remoteStateRepoName, + @NonNull String remoteStateRepoType, + @NonNull String routingTableRepoName, + @NonNull String routingTableRepoType + ) { + String remoteStateRepositoryTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + remoteStateRepoName + ); + String routingTableRepositoryTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + routingTableRepoName + ); + String remoteStateRepositorySettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + remoteStateRepoName + ); + String routingTableRepositorySettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + routingTableRepoName + ); + + return Settings.builder() + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, remoteStateRepoName) + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, routingTableRepoName) + .put(remoteStateRepositoryTypeAttributeKey, remoteStateRepoType) + .put(routingTableRepositoryTypeAttributeKey, routingTableRepoType) + .put(remoteStateRepositorySettingsAttributeKeyPrefix + "location", randomRepoPath().toAbsolutePath()) + .put(routingTableRepositorySettingsAttributeKeyPrefix + "location", randomRepoPath().toAbsolutePath()) + .build(); + } + public static String resolvePath(IndexId indexId, String shardId) { PathType pathType = PathType.fromCode(indexId.getShardPathType()); RemoteStorePathStrategy.SnapshotShardPathInput shardPathInput = new RemoteStorePathStrategy.SnapshotShardPathInput.Builder() From 336bb5fc7195b8d3990698788c8600bc54330283 Mon Sep 17 00:00:00 2001 From: Brandon Shien <44730413+bshien@users.noreply.github.com> Date: Tue, 10 Dec 2024 20:29:32 -0800 Subject: [PATCH 3/7] Added release notes for 1.3.20 (#16824) Signed-off-by: Brandon Shien --- release-notes/opensearch.release-notes-1.3.20.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 release-notes/opensearch.release-notes-1.3.20.md diff --git a/release-notes/opensearch.release-notes-1.3.20.md b/release-notes/opensearch.release-notes-1.3.20.md new file mode 100644 index 0000000000000..44cd62e31a928 --- /dev/null +++ b/release-notes/opensearch.release-notes-1.3.20.md @@ -0,0 +1,14 @@ +## 2024-12-10 Version 1.3.20 Release Notes + +### Dependencies +- Bump `icu4j` from 62.1 to 62.2 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) +- Bump `org.bouncycastle:bc-fips` from 1.0.2.4 to 1.0.2.5 ([#13446](https://github.com/opensearch-project/OpenSearch/pull/13446)) +- Bump `Netty` from 4.1.112.Final to 4.1.115.Final ([#16661](https://github.com/opensearch-project/OpenSearch/pull/16661)) +- Bump `avro` from 1.11.3 to 1.11.4 ([#16773](https://github.com/opensearch-project/OpenSearch/pull/16773)) +- Bump `commonsio` to 2.16.0 ([#16780](https://github.com/opensearch-project/OpenSearch/pull/16780)) +- Bump `protobuf-java` to 3.25.5 ([#16792](https://github.com/opensearch-project/OpenSearch/pull/16792)) +- Bump `snappy-java` to 1.1.10.7 ([#16792](https://github.com/opensearch-project/OpenSearch/pull/16792)) + +### Fixed +- Update help output for _cat ([#14722](https://github.com/opensearch-project/OpenSearch/pull/14722)) +- Bugfix to guard against stack overflow errors caused by very large reg-ex input ([#16101](https://github.com/opensearch-project/OpenSearch/pull/16101)) From c5f381898ec3e1e505b5b52d43462ebcd7f27bb6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:56:02 +0800 Subject: [PATCH 4/7] Bump com.nimbusds:nimbus-jose-jwt from 9.46 to 9.47 in /test/fixtures/hdfs-fixture (#16807) * Bump com.nimbusds:nimbus-jose-jwt in /test/fixtures/hdfs-fixture Bumps [com.nimbusds:nimbus-jose-jwt](https://bitbucket.org/connect2id/nimbus-jose-jwt) from 9.46 to 9.47. - [Changelog](https://bitbucket.org/connect2id/nimbus-jose-jwt/src/master/CHANGELOG.txt) - [Commits](https://bitbucket.org/connect2id/nimbus-jose-jwt/branches/compare/9.47..9.46) --- updated-dependencies: - dependency-name: com.nimbusds:nimbus-jose-jwt dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 2 +- test/fixtures/hdfs-fixture/build.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aeb915ed6143..5029909a25fcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.azure:azure-storage-blob` from 12.23.0 to 12.28.1 ([#16501](https://github.com/opensearch-project/OpenSearch/pull/16501)) - Bump `org.apache.hadoop:hadoop-minicluster` from 3.4.0 to 3.4.1 ([#16550](https://github.com/opensearch-project/OpenSearch/pull/16550)) - Bump `org.apache.xmlbeans:xmlbeans` from 5.2.1 to 5.2.2 ([#16612](https://github.com/opensearch-project/OpenSearch/pull/16612)) -- Bump `com.nimbusds:nimbus-jose-jwt` from 9.41.1 to 9.46 ([#16611](https://github.com/opensearch-project/OpenSearch/pull/16611)) +- Bump `com.nimbusds:nimbus-jose-jwt` from 9.41.1 to 9.47 ([#16611](https://github.com/opensearch-project/OpenSearch/pull/16611), [#16807](https://github.com/opensearch-project/OpenSearch/pull/16807)) - Bump `lycheeverse/lychee-action` from 2.0.2 to 2.1.0 ([#16610](https://github.com/opensearch-project/OpenSearch/pull/16610)) - Bump `me.champeau.gradle.japicmp` from 0.4.4 to 0.4.5 ([#16614](https://github.com/opensearch-project/OpenSearch/pull/16614)) - Bump `mockito` from 5.14.1 to 5.14.2, `objenesis` from 3.2 to 3.3 and `bytebuddy` from 1.15.4 to 1.15.10 ([#16655](https://github.com/opensearch-project/OpenSearch/pull/16655)) diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index f531a3c6ade5a..4dd1a2787ee87 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -79,7 +79,7 @@ dependencies { api "org.jboss.xnio:xnio-nio:3.8.16.Final" api 'org.jline:jline:3.27.1' api 'org.apache.commons:commons-configuration2:2.11.0' - api 'com.nimbusds:nimbus-jose-jwt:9.46' + api 'com.nimbusds:nimbus-jose-jwt:9.47' api ('org.apache.kerby:kerb-admin:2.1.0') { exclude group: "org.jboss.xnio" exclude group: "org.jline" From 5aa65096ff3ca3aec8eb563a8ac52c5e42bf5009 Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Wed, 11 Dec 2024 06:02:21 -0800 Subject: [PATCH 5/7] Update opensearch.release-notes-1.3.20.md (#16825) Signed-off-by: Daniel Widdis --- release-notes/opensearch.release-notes-1.3.20.md | 1 + 1 file changed, 1 insertion(+) diff --git a/release-notes/opensearch.release-notes-1.3.20.md b/release-notes/opensearch.release-notes-1.3.20.md index 44cd62e31a928..b3cc89fb37985 100644 --- a/release-notes/opensearch.release-notes-1.3.20.md +++ b/release-notes/opensearch.release-notes-1.3.20.md @@ -8,6 +8,7 @@ - Bump `commonsio` to 2.16.0 ([#16780](https://github.com/opensearch-project/OpenSearch/pull/16780)) - Bump `protobuf-java` to 3.25.5 ([#16792](https://github.com/opensearch-project/OpenSearch/pull/16792)) - Bump `snappy-java` to 1.1.10.7 ([#16792](https://github.com/opensearch-project/OpenSearch/pull/16792)) +- Bump `mime4j-core` to 0.8.11 ([#16810](https://github.com/opensearch-project/OpenSearch/pull/16810)) ### Fixed - Update help output for _cat ([#14722](https://github.com/opensearch-project/OpenSearch/pull/14722)) From 2b402eccccbce497a37959ee89a200a4dc3318c6 Mon Sep 17 00:00:00 2001 From: gargharsh3134 <51459091+gargharsh3134@users.noreply.github.com> Date: Thu, 12 Dec 2024 08:44:30 +0530 Subject: [PATCH 6/7] Fixing _list/shards API for closed indices (#16606) * Fixing _list/shards API for closed indices Signed-off-by: Harsh Garg --- CHANGELOG.md | 1 + .../shards/TransportCatShardsActionIT.java | 342 +++++++++++++++++- .../shards/TransportCatShardsAction.java | 28 +- 3 files changed, 364 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5029909a25fcf..e4b56db662881 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bound the size of cache in deprecation logger ([16702](https://github.com/opensearch-project/OpenSearch/issues/16702)) - Ensure consistency of system flag on IndexMetadata after diff is applied ([#16644](https://github.com/opensearch-project/OpenSearch/pull/16644)) - Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state ([#16763](https://github.com/opensearch-project/OpenSearch/pull/16763)) +- Fix _list/shards API failing when closed indices are present ([#16606](https://github.com/opensearch-project/OpenSearch/pull/16606)) ### Security diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java index 32d5b3db85629..a7cb4847b45e5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java @@ -8,9 +8,15 @@ package org.opensearch.action.admin.cluster.shards; +import org.opensearch.action.admin.indices.alias.IndicesAliasesRequest; +import org.opensearch.action.admin.indices.datastream.DataStreamTestCase; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.action.pagination.PageParams; +import org.opensearch.client.Requests; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; @@ -20,15 +26,19 @@ import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; import static org.opensearch.common.unit.TimeValue.timeValueMillis; import static org.opensearch.search.SearchService.NO_TIMEOUT; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(numDataNodes = 0, scope = OpenSearchIntegTestCase.Scope.TEST) -public class TransportCatShardsActionIT extends OpenSearchIntegTestCase { +public class TransportCatShardsActionIT extends DataStreamTestCase { public void testCatShardsWithSuccessResponse() throws InterruptedException { internalCluster().startClusterManagerOnlyNodes(1); @@ -125,4 +135,334 @@ public void onFailure(Exception e) { latch.await(); } + public void testListShardsWithHiddenIndex() throws Exception { + final int numShards = 1; + final int numReplicas = 1; + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(2); + createIndex( + "test-hidden-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .put(IndexMetadata.SETTING_INDEX_HIDDEN, true) + .build() + ); + ensureGreen(); + + // Verify result for a default query: "_list/shards" + CatShardsRequest listShardsRequest = getListShardsTransportRequest(Strings.EMPTY_ARRAY, 100); + ActionFuture listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-hidden-idx", 2, true); + + // Verify result when hidden index is explicitly queried: "_list/shards" + listShardsRequest = getListShardsTransportRequest(new String[] { "test-hidden-idx" }, 100); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-hidden-idx", 2, true); + + // Verify result when hidden index is queried with wildcard: "_list/shards*" + // Since the ClusterStateAction underneath is invoked with lenientExpandOpen IndicesOptions, + // Wildcards for hidden indices should not get resolved. + listShardsRequest = getListShardsTransportRequest(new String[] { "test-hidden-idx*" }, 100); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertEquals(0, listShardsResponse.get().getResponseShards().size()); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-hidden-idx", 0, false); + } + + public void testListShardsWithClosedIndex() throws Exception { + final int numShards = 1; + final int numReplicas = 1; + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(2); + createIndex( + "test-closed-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + ensureGreen(); + + // close index "test-closed-idx" + client().admin().indices().close(Requests.closeIndexRequest("test-closed-idx")).get(); + ensureGreen(); + + // Verify result for a default query: "_list/shards" + CatShardsRequest listShardsRequest = getListShardsTransportRequest(Strings.EMPTY_ARRAY, 100); + ActionFuture listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-closed-idx", 2, false); + + // Verify result when closed index is explicitly queried: "_list/shards" + listShardsRequest = getListShardsTransportRequest(new String[] { "test-closed-idx" }, 100); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-closed-idx", 2, false); + + // Verify result when closed index is queried with wildcard: "_list/shards*" + // Since the ClusterStateAction underneath is invoked with lenientExpandOpen IndicesOptions, + // Wildcards for closed indices should not get resolved. + listShardsRequest = getListShardsTransportRequest(new String[] { "test-closed-idx*" }, 100); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), "test-closed-idx", 0, false); + } + + public void testListShardsWithClosedAndHiddenIndices() throws InterruptedException, ExecutionException { + final int numIndices = 4; + final int numShards = 1; + final int numReplicas = 2; + final int pageSize = 100; + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(3); + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-2", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-closed-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-hidden-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .put(IndexMetadata.SETTING_INDEX_HIDDEN, true) + .build() + ); + // close index "test-closed-idx" + client().admin().indices().close(Requests.closeIndexRequest("test-closed-idx")).get(); + ensureGreen(); + + // Verifying response for default queries: /_list/shards + // all the shards should be part of response, however stats should not be displayed for closed index + CatShardsRequest listShardsRequest = getListShardsTransportRequest(Strings.EMPTY_ARRAY, pageSize); + ActionFuture listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertTrue(listShardsResponse.get().getResponseShards().stream().anyMatch(shard -> shard.getIndexName().equals("test-closed-idx"))); + assertTrue(listShardsResponse.get().getResponseShards().stream().anyMatch(shard -> shard.getIndexName().equals("test-hidden-idx"))); + assertEquals(numIndices * numShards * (numReplicas + 1), listShardsResponse.get().getResponseShards().size()); + assertFalse( + Arrays.stream(listShardsResponse.get().getIndicesStatsResponse().getShards()) + .anyMatch(shardStats -> shardStats.getShardRouting().getIndexName().equals("test-closed-idx")) + ); + assertEquals( + (numIndices - 1) * numShards * (numReplicas + 1), + listShardsResponse.get().getIndicesStatsResponse().getShards().length + ); + + // Verifying responses when hidden indices are explicitly queried: /_list/shards/test-hidden-idx + // Shards for hidden index should appear in response along with stats + listShardsRequest.setIndices(List.of("test-hidden-idx").toArray(new String[0])); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertTrue(listShardsResponse.get().getResponseShards().stream().allMatch(shard -> shard.getIndexName().equals("test-hidden-idx"))); + assertTrue( + Arrays.stream(listShardsResponse.get().getIndicesStatsResponse().getShards()) + .allMatch(shardStats -> shardStats.getShardRouting().getIndexName().equals("test-hidden-idx")) + ); + assertEquals( + listShardsResponse.get().getResponseShards().size(), + listShardsResponse.get().getIndicesStatsResponse().getShards().length + ); + + // Verifying responses when hidden indices are queried with wildcards: /_list/shards/test-hidden-idx* + // Shards for hidden index should not appear in response with stats. + listShardsRequest.setIndices(List.of("test-hidden-idx*").toArray(new String[0])); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertEquals(0, listShardsResponse.get().getResponseShards().size()); + assertEquals(0, listShardsResponse.get().getIndicesStatsResponse().getShards().length); + + // Explicitly querying for closed index: /_list/shards/test-closed-idx + // should output closed shards without stats. + listShardsRequest.setIndices(List.of("test-closed-idx").toArray(new String[0])); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertTrue(listShardsResponse.get().getResponseShards().stream().anyMatch(shard -> shard.getIndexName().equals("test-closed-idx"))); + assertEquals(0, listShardsResponse.get().getIndicesStatsResponse().getShards().length); + + // Querying for closed index with wildcards: /_list/shards/test-closed-idx* + // should not output any closed shards. + listShardsRequest.setIndices(List.of("test-closed-idx*").toArray(new String[0])); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertEquals(0, listShardsResponse.get().getResponseShards().size()); + assertEquals(0, listShardsResponse.get().getIndicesStatsResponse().getShards().length); + } + + public void testListShardsWithClosedIndicesAcrossPages() throws InterruptedException, ExecutionException { + final int numIndices = 4; + final int numShards = 1; + final int numReplicas = 2; + final int pageSize = numShards * (numReplicas + 1); + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(3); + createIndex( + "test-open-idx-1", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-closed-idx-1", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-open-idx-2", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-closed-idx-2", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .put(IndexMetadata.SETTING_INDEX_HIDDEN, true) + .build() + ); + // close index "test-closed-idx-1" + client().admin().indices().close(Requests.closeIndexRequest("test-closed-idx-1")).get(); + ensureGreen(); + // close index "test-closed-idx-2" + client().admin().indices().close(Requests.closeIndexRequest("test-closed-idx-2")).get(); + ensureGreen(); + + // Verifying response for default queries: /_list/shards + List responseShardRouting = new ArrayList<>(); + List responseShardStats = new ArrayList<>(); + String nextToken = null; + CatShardsRequest listShardsRequest; + ActionFuture listShardsResponse; + do { + listShardsRequest = getListShardsTransportRequest(Strings.EMPTY_ARRAY, nextToken, pageSize); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + nextToken = listShardsResponse.get().getPageToken().getNextToken(); + responseShardRouting.addAll(listShardsResponse.get().getResponseShards()); + responseShardStats.addAll(List.of(listShardsResponse.get().getIndicesStatsResponse().getShards())); + } while (nextToken != null); + + assertTrue(responseShardRouting.stream().anyMatch(shard -> shard.getIndexName().equals("test-closed-idx-1"))); + assertTrue(responseShardRouting.stream().anyMatch(shard -> shard.getIndexName().equals("test-closed-idx-2"))); + assertEquals(numIndices * numShards * (numReplicas + 1), responseShardRouting.size()); + // ShardsStats should only appear for 2 open indices + assertFalse( + responseShardStats.stream().anyMatch(shardStats -> shardStats.getShardRouting().getIndexName().contains("test-closed-idx")) + ); + assertEquals(2 * numShards * (numReplicas + 1), responseShardStats.size()); + } + + public void testListShardsWithDataStream() throws Exception { + final int numDataNodes = 3; + String dataStreamName = "logs-test"; + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(numDataNodes); + // Create an index template for data streams. + createDataStreamIndexTemplate("data-stream-template", List.of("logs-*")); + // Create data streams matching the "logs-*" index pattern. + createDataStream(dataStreamName); + ensureGreen(); + // Verifying default query's result. Data stream should have created a hidden backing index in the + // background and all the corresponding shards should appear in the response along with stats. + CatShardsRequest listShardsRequest = getListShardsTransportRequest(Strings.EMPTY_ARRAY, numDataNodes * numDataNodes); + ActionFuture listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), dataStreamName, numDataNodes + 1, true); + // Verifying result when data stream is directly queried. Again, all the shards with stats should appear + listShardsRequest = getListShardsTransportRequest(new String[] { dataStreamName }, numDataNodes * numDataNodes); + listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertSingleIndexResponseShards(listShardsResponse.get(), dataStreamName, numDataNodes + 1, true); + } + + public void testListShardsWithAliases() throws Exception { + final int numShards = 1; + final int numReplicas = 1; + final String aliasName = "test-alias"; + internalCluster().startClusterManagerOnlyNodes(1); + internalCluster().startDataOnlyNodes(3); + createIndex( + "test-closed-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .build() + ); + createIndex( + "test-hidden-idx", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .put(IndexMetadata.SETTING_INDEX_HIDDEN, true) + .build() + ); + ensureGreen(); + + // Point test alias to both the indices (one being hidden while the other is closed) + final IndicesAliasesRequest request = new IndicesAliasesRequest().origin("allowed"); + request.addAliasAction(IndicesAliasesRequest.AliasActions.add().index("test-closed-idx").alias(aliasName)); + assertAcked(client().admin().indices().aliases(request).actionGet()); + + request.addAliasAction(IndicesAliasesRequest.AliasActions.add().index("test-hidden-idx").alias(aliasName)); + assertAcked(client().admin().indices().aliases(request).actionGet()); + + // close index "test-closed-idx" + client().admin().indices().close(Requests.closeIndexRequest("test-closed-idx")).get(); + ensureGreen(); + + // Verifying result when an alias is explicitly queried. + CatShardsRequest listShardsRequest = getListShardsTransportRequest(new String[] { aliasName }, 100); + ActionFuture listShardsResponse = client().execute(CatShardsAction.INSTANCE, listShardsRequest); + assertTrue( + listShardsResponse.get() + .getResponseShards() + .stream() + .allMatch(shard -> shard.getIndexName().equals("test-hidden-idx") || shard.getIndexName().equals("test-closed-idx")) + ); + assertTrue( + Arrays.stream(listShardsResponse.get().getIndicesStatsResponse().getShards()) + .allMatch(shardStats -> shardStats.getShardRouting().getIndexName().equals("test-hidden-idx")) + ); + assertEquals(4, listShardsResponse.get().getResponseShards().size()); + assertEquals(2, listShardsResponse.get().getIndicesStatsResponse().getShards().length); + } + + private void assertSingleIndexResponseShards( + CatShardsResponse catShardsResponse, + String indexNamePattern, + final int totalNumShards, + boolean shardStatsExist + ) { + assertTrue(catShardsResponse.getResponseShards().stream().allMatch(shard -> shard.getIndexName().contains(indexNamePattern))); + assertEquals(totalNumShards, catShardsResponse.getResponseShards().size()); + if (shardStatsExist) { + assertTrue( + Arrays.stream(catShardsResponse.getIndicesStatsResponse().getShards()) + .allMatch(shardStats -> shardStats.getShardRouting().getIndexName().contains(indexNamePattern)) + ); + } + assertEquals(shardStatsExist ? totalNumShards : 0, catShardsResponse.getIndicesStatsResponse().getShards().length); + } + + private CatShardsRequest getListShardsTransportRequest(String[] indices, final int pageSize) { + return getListShardsTransportRequest(indices, null, pageSize); + } + + private CatShardsRequest getListShardsTransportRequest(String[] indices, String nextToken, final int pageSize) { + CatShardsRequest listShardsRequest = new CatShardsRequest(); + listShardsRequest.setCancelAfterTimeInterval(NO_TIMEOUT); + listShardsRequest.setIndices(indices); + listShardsRequest.setPageParams(new PageParams(nextToken, PageParams.PARAM_ASC_SORT_VALUE, pageSize)); + return listShardsRequest; + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java index 7b36b7a10f4f2..01efa96a7369e 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java @@ -18,6 +18,8 @@ import org.opensearch.action.support.HandledTransportAction; import org.opensearch.action.support.TimeoutTaskCancellationUtility; import org.opensearch.client.node.NodeClient; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.breaker.ResponseLimitBreachedException; import org.opensearch.common.breaker.ResponseLimitSettings; import org.opensearch.common.inject.Inject; @@ -27,6 +29,7 @@ import org.opensearch.tasks.Task; import org.opensearch.transport.TransportService; +import java.util.List; import java.util.Objects; import static org.opensearch.common.breaker.ResponseLimitSettings.LimitEntity.SHARDS; @@ -98,9 +101,6 @@ public void onResponse(ClusterStateResponse clusterStateResponse) { shardsRequest.getPageParams(), clusterStateResponse ); - String[] indices = Objects.isNull(paginationStrategy) - ? shardsRequest.getIndices() - : paginationStrategy.getRequestedIndices().toArray(new String[0]); catShardsResponse.setNodes(clusterStateResponse.getState().getNodes()); catShardsResponse.setResponseShards( Objects.isNull(paginationStrategy) @@ -108,8 +108,12 @@ public void onResponse(ClusterStateResponse clusterStateResponse) { : paginationStrategy.getRequestedEntities() ); catShardsResponse.setPageToken(Objects.isNull(paginationStrategy) ? null : paginationStrategy.getResponseToken()); + + String[] indices = Objects.isNull(paginationStrategy) + ? shardsRequest.getIndices() + : filterClosedIndices(clusterStateResponse.getState(), paginationStrategy.getRequestedIndices()); // For paginated queries, if strategy outputs no shards to be returned, avoid fetching IndicesStats. - if (shouldSkipIndicesStatsRequest(paginationStrategy)) { + if (shouldSkipIndicesStatsRequest(paginationStrategy, indices)) { catShardsResponse.setIndicesStatsResponse(IndicesStatsResponse.getEmptyResponse()); cancellableListener.onResponse(catShardsResponse); return; @@ -166,7 +170,19 @@ private void validateRequestLimit( } } - private boolean shouldSkipIndicesStatsRequest(ShardPaginationStrategy paginationStrategy) { - return Objects.nonNull(paginationStrategy) && paginationStrategy.getRequestedEntities().isEmpty(); + private boolean shouldSkipIndicesStatsRequest(ShardPaginationStrategy paginationStrategy, String[] indices) { + return Objects.nonNull(paginationStrategy) && (indices == null || indices.length == 0); + } + + /** + * Will be used by paginated query (_list/shards) to filter out closed indices (only consider OPEN) before fetching + * IndicesStats. Since pagination strategy always passes concrete indices to TransportIndicesStatsAction, + * the default behaviour of StrictExpandOpenAndForbidClosed leads to errors if closed indices are encountered. + */ + private String[] filterClosedIndices(ClusterState clusterState, List strategyIndices) { + return strategyIndices.stream().filter(index -> { + IndexMetadata metadata = clusterState.metadata().indices().get(index); + return metadata != null && metadata.getState().equals(IndexMetadata.State.CLOSE) == false; + }).toArray(String[]::new); } } From b67cdf47b9cc5126c2d994f3c4ad0676678b16bc Mon Sep 17 00:00:00 2001 From: Vinay Krishna Pudyodu Date: Thu, 12 Dec 2024 15:41:44 -0800 Subject: [PATCH 7/7] Added support for search replica to return segrep stats (#16678) * Added implementation for the stats calculation for search and regular replica in shards Signed-off-by: Vinay Krishna Pudyodu * Updated changelog Signed-off-by: Vinay Krishna Pudyodu * Added unit tests for TransportSegmentReplicationStatsAction Signed-off-by: Vinay Krishna Pudyodu * fixed java style after running precommit locally Signed-off-by: Vinay Krishna Pudyodu * refined the test cases Signed-off-by: Vinay Krishna Pudyodu * fixed style issues Signed-off-by: Vinay Krishna Pudyodu * Made changes in the bytes to download calculation based on comments Signed-off-by: Vinay Krishna Pudyodu * added addReplicaStats method to SegmentReplicationPerGroupStats Signed-off-by: Vinay Krishna Pudyodu * fixed style issues Signed-off-by: Vinay Krishna Pudyodu * Fixed issue with immutable set Signed-off-by: Vinay Krishna Pudyodu * Fixed PR comments and moved the integration tests to separate module Signed-off-by: Vinay Krishna Pudyodu * Fixed failing integ tests Signed-off-by: Vinay Krishna Pudyodu * Fixed failing integ test Signed-off-by: Vinay Krishna Pudyodu * fixed some comments for PR Signed-off-by: Vinay Krishna Pudyodu * fixed failing tests Signed-off-by: Vinay Krishna Pudyodu --------- Signed-off-by: Vinay Krishna Pudyodu --- CHANGELOG.md | 1 + .../SearchReplicaReplicationIT.java | 49 ++ ...ransportSegmentReplicationStatsAction.java | 117 +++- ...ortSegmentReplicationStatsActionTests.java | 595 ++++++++++++++++++ 4 files changed, 744 insertions(+), 18 deletions(-) create mode 100644 server/src/test/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsActionTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index e4b56db662881..6b76a3d50cb0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)). - Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/)) - Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)). +- Add search replica stats to segment replication stats API ([#16678](https://github.com/opensearch-project/OpenSearch/pull/16678)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java index a1b512c326ac5..f660695af9965 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -8,14 +8,20 @@ package org.opensearch.indices.replication; +import org.opensearch.action.admin.indices.replication.SegmentReplicationStatsResponse; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.SegmentReplicationPerGroupStats; +import org.opensearch.index.SegmentReplicationShardStats; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.After; import org.junit.Before; import java.nio.file.Path; +import java.util.List; +import java.util.Set; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class SearchReplicaReplicationIT extends SegmentReplicationBaseIT { @@ -82,4 +88,47 @@ public void testReplication() throws Exception { waitForSearchableDocs(docCount, primary, replica); } + public void testSegmentReplicationStatsResponseWithSearchReplica() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final List nodes = internalCluster().startDataOnlyNodes(2); + createIndex( + INDEX_NAME, + Settings.builder() + .put("number_of_shards", 1) + .put("number_of_replicas", 0) + .put("number_of_search_only_replicas", 1) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build() + ); + ensureGreen(INDEX_NAME); + + final int docCount = 5; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + waitForSearchableDocs(docCount, nodes); + + SegmentReplicationStatsResponse segmentReplicationStatsResponse = dataNodeClient().admin() + .indices() + .prepareSegmentReplicationStats(INDEX_NAME) + .setDetailed(true) + .execute() + .actionGet(); + + // Verify the number of indices + assertEquals(1, segmentReplicationStatsResponse.getReplicationStats().size()); + // Verify total shards + assertEquals(2, segmentReplicationStatsResponse.getTotalShards()); + // Verify the number of primary shards + assertEquals(1, segmentReplicationStatsResponse.getReplicationStats().get(INDEX_NAME).size()); + + SegmentReplicationPerGroupStats perGroupStats = segmentReplicationStatsResponse.getReplicationStats().get(INDEX_NAME).get(0); + Set replicaStats = perGroupStats.getReplicaStats(); + // Verify the number of replica stats + assertEquals(1, replicaStats.size()); + for (SegmentReplicationShardStats replicaStat : replicaStats) { + assertNotNull(replicaStat.getCurrentReplicationState()); + } + } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsAction.java b/server/src/main/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsAction.java index fc97d67c6c3af..44408c5043fcf 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsAction.java @@ -21,7 +21,6 @@ import org.opensearch.core.action.support.DefaultShardOperationFailedException; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.shard.ShardId; -import org.opensearch.index.IndexService; import org.opensearch.index.SegmentReplicationPerGroupStats; import org.opensearch.index.SegmentReplicationPressureService; import org.opensearch.index.SegmentReplicationShardStats; @@ -38,7 +37,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Transport action for shard segment replication operation. This transport action does not actually @@ -96,11 +97,11 @@ protected SegmentReplicationStatsResponse newResponse( ) { String[] shards = request.shards(); final List shardsToFetch = Arrays.stream(shards).map(Integer::valueOf).collect(Collectors.toList()); - // organize replica responses by allocationId. final Map replicaStats = new HashMap<>(); // map of index name to list of replication group stats. final Map> primaryStats = new HashMap<>(); + for (SegmentReplicationShardStatsResponse response : responses) { if (response != null) { if (response.getReplicaStats() != null) { @@ -109,6 +110,7 @@ protected SegmentReplicationStatsResponse newResponse( replicaStats.putIfAbsent(shardRouting.allocationId().getId(), response.getReplicaStats()); } } + if (response.getPrimaryStats() != null) { final ShardId shardId = response.getPrimaryStats().getShardId(); if (shardsToFetch.isEmpty() || shardsToFetch.contains(shardId.getId())) { @@ -126,15 +128,20 @@ protected SegmentReplicationStatsResponse newResponse( } } } - // combine the replica stats to the shard stat entry in each group. - for (Map.Entry> entry : primaryStats.entrySet()) { - for (SegmentReplicationPerGroupStats group : entry.getValue()) { - for (SegmentReplicationShardStats replicaStat : group.getReplicaStats()) { - replicaStat.setCurrentReplicationState(replicaStats.getOrDefault(replicaStat.getAllocationId(), null)); - } - } - } - return new SegmentReplicationStatsResponse(totalShards, successfulShards, failedShards, primaryStats, shardFailures); + + Map> replicationStats = primaryStats.entrySet() + .stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + entry -> entry.getValue() + .stream() + .map(groupStats -> updateGroupStats(groupStats, replicaStats)) + .collect(Collectors.toList()) + ) + ); + + return new SegmentReplicationStatsResponse(totalShards, successfulShards, failedShards, replicationStats, shardFailures); } @Override @@ -144,9 +151,8 @@ protected SegmentReplicationStatsRequest readRequestFrom(StreamInput in) throws @Override protected SegmentReplicationShardStatsResponse shardOperation(SegmentReplicationStatsRequest request, ShardRouting shardRouting) { - IndexService indexService = indicesService.indexServiceSafe(shardRouting.shardId().getIndex()); - IndexShard indexShard = indexService.getShard(shardRouting.shardId().id()); ShardId shardId = shardRouting.shardId(); + IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShard(shardId.id()); if (indexShard.indexSettings().isSegRepEnabledOrRemoteNode() == false) { return null; @@ -156,11 +162,7 @@ protected SegmentReplicationShardStatsResponse shardOperation(SegmentReplication return new SegmentReplicationShardStatsResponse(pressureService.getStatsForShard(indexShard)); } - // return information about only on-going segment replication events. - if (request.activeOnly()) { - return new SegmentReplicationShardStatsResponse(targetService.getOngoingEventSegmentReplicationState(shardId)); - } - return new SegmentReplicationShardStatsResponse(targetService.getSegmentReplicationState(shardId)); + return new SegmentReplicationShardStatsResponse(getSegmentReplicationState(shardId, request.activeOnly())); } @Override @@ -181,4 +183,83 @@ protected ClusterBlockException checkRequestBlock( ) { return state.blocks().indicesBlockedException(ClusterBlockLevel.METADATA_READ, concreteIndices); } + + private SegmentReplicationPerGroupStats updateGroupStats( + SegmentReplicationPerGroupStats groupStats, + Map replicaStats + ) { + // Update the SegmentReplicationState for each of the replicas + Set updatedReplicaStats = groupStats.getReplicaStats() + .stream() + .peek(replicaStat -> replicaStat.setCurrentReplicationState(replicaStats.getOrDefault(replicaStat.getAllocationId(), null))) + .collect(Collectors.toSet()); + + // Compute search replica stats + Set searchReplicaStats = computeSearchReplicaStats(groupStats.getShardId(), replicaStats); + + // Combine ReplicaStats and SearchReplicaStats + Set combinedStats = Stream.concat(updatedReplicaStats.stream(), searchReplicaStats.stream()) + .collect(Collectors.toSet()); + + return new SegmentReplicationPerGroupStats(groupStats.getShardId(), combinedStats, groupStats.getRejectedRequestCount()); + } + + private Set computeSearchReplicaStats( + ShardId shardId, + Map replicaStats + ) { + return replicaStats.values() + .stream() + .filter(segmentReplicationState -> segmentReplicationState.getShardRouting().shardId().equals(shardId)) + .filter(segmentReplicationState -> segmentReplicationState.getShardRouting().isSearchOnly()) + .map(segmentReplicationState -> { + ShardRouting shardRouting = segmentReplicationState.getShardRouting(); + SegmentReplicationShardStats segmentReplicationStats = computeSegmentReplicationShardStats(shardRouting); + segmentReplicationStats.setCurrentReplicationState(segmentReplicationState); + return segmentReplicationStats; + }) + .collect(Collectors.toSet()); + } + + SegmentReplicationShardStats computeSegmentReplicationShardStats(ShardRouting shardRouting) { + ShardId shardId = shardRouting.shardId(); + SegmentReplicationState completedSegmentReplicationState = targetService.getlatestCompletedEventSegmentReplicationState(shardId); + SegmentReplicationState ongoingSegmentReplicationState = targetService.getOngoingEventSegmentReplicationState(shardId); + + return new SegmentReplicationShardStats( + shardRouting.allocationId().getId(), + 0, + calculateBytesRemainingToReplicate(ongoingSegmentReplicationState), + 0, + getCurrentReplicationLag(ongoingSegmentReplicationState), + getLastCompletedReplicationLag(completedSegmentReplicationState) + ); + } + + private SegmentReplicationState getSegmentReplicationState(ShardId shardId, boolean isActiveOnly) { + if (isActiveOnly) { + return targetService.getOngoingEventSegmentReplicationState(shardId); + } else { + return targetService.getSegmentReplicationState(shardId); + } + } + + private long calculateBytesRemainingToReplicate(SegmentReplicationState ongoingSegmentReplicationState) { + if (ongoingSegmentReplicationState == null) { + return 0; + } + return ongoingSegmentReplicationState.getIndex() + .fileDetails() + .stream() + .mapToLong(index -> index.length() - index.recovered()) + .sum(); + } + + private long getCurrentReplicationLag(SegmentReplicationState ongoingSegmentReplicationState) { + return ongoingSegmentReplicationState != null ? ongoingSegmentReplicationState.getTimer().time() : 0; + } + + private long getLastCompletedReplicationLag(SegmentReplicationState completedSegmentReplicationState) { + return completedSegmentReplicationState != null ? completedSegmentReplicationState.getTimer().time() : 0; + } } diff --git a/server/src/test/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsActionTests.java b/server/src/test/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsActionTests.java new file mode 100644 index 0000000000000..ea455d607f058 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/indices/replication/TransportSegmentReplicationStatsActionTests.java @@ -0,0 +1,595 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.indices.replication; + +import org.opensearch.Version; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlock; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.routing.AllocationId; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardIterator; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardsIterator; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.support.DefaultShardOperationFailedException; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.SegmentReplicationPerGroupStats; +import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationShardStats; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.SegmentReplicationState; +import org.opensearch.indices.replication.SegmentReplicationTargetService; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; +import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.transport.TransportService; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TransportSegmentReplicationStatsActionTests extends OpenSearchTestCase { + @Mock + private ClusterService clusterService; + @Mock + private TransportService transportService; + @Mock + private IndicesService indicesService; + @Mock + private SegmentReplicationTargetService targetService; + @Mock + private ActionFilters actionFilters; + @Mock + private IndexNameExpressionResolver indexNameExpressionResolver; + @Mock + private SegmentReplicationPressureService pressureService; + @Mock + private IndexShard indexShard; + @Mock + private IndexService indexService; + + private TransportSegmentReplicationStatsAction action; + + @Before + public void setUp() throws Exception { + MockitoAnnotations.openMocks(this); + super.setUp(); + action = new TransportSegmentReplicationStatsAction( + clusterService, + transportService, + indicesService, + targetService, + actionFilters, + indexNameExpressionResolver, + pressureService + ); + } + + public void testShardReturnsAllTheShardsForTheIndex() { + SegmentReplicationStatsRequest segmentReplicationStatsRequest = mock(SegmentReplicationStatsRequest.class); + String[] concreteIndices = new String[] { "test-index" }; + ClusterState clusterState = mock(ClusterState.class); + RoutingTable routingTables = mock(RoutingTable.class); + ShardsIterator shardsIterator = mock(ShardIterator.class); + + when(clusterState.routingTable()).thenReturn(routingTables); + when(routingTables.allShardsIncludingRelocationTargets(any())).thenReturn(shardsIterator); + assertEquals(shardsIterator, action.shards(clusterState, segmentReplicationStatsRequest, concreteIndices)); + } + + public void testShardOperationWithPrimaryShard() { + ShardRouting shardRouting = mock(ShardRouting.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.primary()).thenReturn(true); + when(indicesService.indexServiceSafe(shardId.getIndex())).thenReturn(indexService); + when(indexService.getShard(shardId.id())).thenReturn(indexShard); + when(indexShard.indexSettings()).thenReturn(createIndexSettingsWithSegRepEnabled()); + + SegmentReplicationShardStatsResponse response = action.shardOperation(request, shardRouting); + + assertNotNull(response); + verify(pressureService).getStatsForShard(any()); + } + + public void testShardOperationWithReplicaShard() { + ShardRouting shardRouting = mock(ShardRouting.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + request.activeOnly(false); + SegmentReplicationState completedSegmentReplicationState = mock(SegmentReplicationState.class); + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.primary()).thenReturn(false); + when(indicesService.indexServiceSafe(shardId.getIndex())).thenReturn(indexService); + when(indexService.getShard(shardId.id())).thenReturn(indexShard); + when(indexShard.indexSettings()).thenReturn(createIndexSettingsWithSegRepEnabled()); + when(targetService.getSegmentReplicationState(shardId)).thenReturn(completedSegmentReplicationState); + + SegmentReplicationShardStatsResponse response = action.shardOperation(request, shardRouting); + + assertNotNull(response); + assertNull(response.getPrimaryStats()); + assertNotNull(response.getReplicaStats()); + verify(targetService).getSegmentReplicationState(shardId); + } + + public void testShardOperationWithReplicaShardActiveOnly() { + ShardRouting shardRouting = mock(ShardRouting.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + request.activeOnly(true); + SegmentReplicationState onGoingSegmentReplicationState = mock(SegmentReplicationState.class); + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.primary()).thenReturn(false); + when(indicesService.indexServiceSafe(shardId.getIndex())).thenReturn(indexService); + when(indexService.getShard(shardId.id())).thenReturn(indexShard); + when(indexShard.indexSettings()).thenReturn(createIndexSettingsWithSegRepEnabled()); + when(targetService.getOngoingEventSegmentReplicationState(shardId)).thenReturn(onGoingSegmentReplicationState); + + SegmentReplicationShardStatsResponse response = action.shardOperation(request, shardRouting); + + assertNotNull(response); + assertNull(response.getPrimaryStats()); + assertNotNull(response.getReplicaStats()); + verify(targetService).getOngoingEventSegmentReplicationState(shardId); + } + + public void testComputeBytesRemainingToReplicateWhenCompletedAndOngoingStateNotNull() { + ShardRouting shardRouting = mock(ShardRouting.class); + SegmentReplicationState completedSegmentReplicationState = mock(SegmentReplicationState.class); + SegmentReplicationState onGoingSegmentReplicationState = mock(SegmentReplicationState.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + AllocationId allocationId = AllocationId.newInitializing(); + ReplicationTimer replicationTimerCompleted = mock(ReplicationTimer.class); + ReplicationTimer replicationTimerOngoing = mock(ReplicationTimer.class); + long time1 = 10; + long time2 = 15; + ReplicationLuceneIndex replicationLuceneIndex = new ReplicationLuceneIndex(); + replicationLuceneIndex.addFileDetail("name1", 10, false); + replicationLuceneIndex.addFileDetail("name2", 15, false); + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.allocationId()).thenReturn(allocationId); + when(targetService.getlatestCompletedEventSegmentReplicationState(shardId)).thenReturn(completedSegmentReplicationState); + when(targetService.getOngoingEventSegmentReplicationState(shardId)).thenReturn(onGoingSegmentReplicationState); + when(completedSegmentReplicationState.getTimer()).thenReturn(replicationTimerCompleted); + when(onGoingSegmentReplicationState.getTimer()).thenReturn(replicationTimerOngoing); + when(replicationTimerOngoing.time()).thenReturn(time1); + when(replicationTimerCompleted.time()).thenReturn(time2); + when(onGoingSegmentReplicationState.getIndex()).thenReturn(replicationLuceneIndex); + + SegmentReplicationShardStats segmentReplicationShardStats = action.computeSegmentReplicationShardStats(shardRouting); + + assertNotNull(segmentReplicationShardStats); + assertEquals(25, segmentReplicationShardStats.getBytesBehindCount()); + assertEquals(10, segmentReplicationShardStats.getCurrentReplicationLagMillis()); + assertEquals(15, segmentReplicationShardStats.getLastCompletedReplicationTimeMillis()); + + verify(targetService).getlatestCompletedEventSegmentReplicationState(shardId); + verify(targetService).getOngoingEventSegmentReplicationState(shardId); + } + + public void testCalculateBytesRemainingToReplicateWhenNoCompletedState() { + ShardRouting shardRouting = mock(ShardRouting.class); + SegmentReplicationState onGoingSegmentReplicationState = mock(SegmentReplicationState.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + AllocationId allocationId = AllocationId.newInitializing(); + ReplicationTimer replicationTimerOngoing = mock(ReplicationTimer.class); + long time1 = 10; + ReplicationLuceneIndex replicationLuceneIndex = new ReplicationLuceneIndex(); + replicationLuceneIndex.addFileDetail("name1", 10, false); + replicationLuceneIndex.addFileDetail("name2", 15, false); + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.allocationId()).thenReturn(allocationId); + when(targetService.getOngoingEventSegmentReplicationState(shardId)).thenReturn(onGoingSegmentReplicationState); + when(onGoingSegmentReplicationState.getTimer()).thenReturn(replicationTimerOngoing); + when(replicationTimerOngoing.time()).thenReturn(time1); + when(onGoingSegmentReplicationState.getIndex()).thenReturn(replicationLuceneIndex); + + SegmentReplicationShardStats segmentReplicationShardStats = action.computeSegmentReplicationShardStats(shardRouting); + + assertNotNull(segmentReplicationShardStats); + assertEquals(25, segmentReplicationShardStats.getBytesBehindCount()); + assertEquals(10, segmentReplicationShardStats.getCurrentReplicationLagMillis()); + assertEquals(0, segmentReplicationShardStats.getLastCompletedReplicationTimeMillis()); + + verify(targetService).getlatestCompletedEventSegmentReplicationState(shardId); + verify(targetService).getOngoingEventSegmentReplicationState(shardId); + } + + public void testCalculateBytesRemainingToReplicateWhenNoOnGoingState() { + ShardRouting shardRouting = mock(ShardRouting.class); + SegmentReplicationState completedSegmentReplicationState = mock(SegmentReplicationState.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + AllocationId allocationId = AllocationId.newInitializing(); + ReplicationTimer replicationTimerCompleted = mock(ReplicationTimer.class); + long time2 = 15; + + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.allocationId()).thenReturn(allocationId); + when(targetService.getlatestCompletedEventSegmentReplicationState(shardId)).thenReturn(completedSegmentReplicationState); + when(completedSegmentReplicationState.getTimer()).thenReturn(replicationTimerCompleted); + when(replicationTimerCompleted.time()).thenReturn(time2); + + SegmentReplicationShardStats segmentReplicationShardStats = action.computeSegmentReplicationShardStats(shardRouting); + + assertNotNull(segmentReplicationShardStats); + assertEquals(0, segmentReplicationShardStats.getBytesBehindCount()); + assertEquals(0, segmentReplicationShardStats.getCurrentReplicationLagMillis()); + assertEquals(15, segmentReplicationShardStats.getLastCompletedReplicationTimeMillis()); + + verify(targetService).getlatestCompletedEventSegmentReplicationState(shardId); + verify(targetService).getOngoingEventSegmentReplicationState(shardId); + } + + public void testCalculateBytesRemainingToReplicateWhenNoCompletedAndOngoingState() { + ShardRouting shardRouting = mock(ShardRouting.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + AllocationId allocationId = AllocationId.newInitializing(); + when(shardRouting.shardId()).thenReturn(shardId); + when(shardRouting.allocationId()).thenReturn(allocationId); + + SegmentReplicationShardStats segmentReplicationShardStats = action.computeSegmentReplicationShardStats(shardRouting); + + assertNotNull(segmentReplicationShardStats); + assertEquals(0, segmentReplicationShardStats.getBytesBehindCount()); + assertEquals(0, segmentReplicationShardStats.getCurrentReplicationLagMillis()); + assertEquals(0, segmentReplicationShardStats.getLastCompletedReplicationTimeMillis()); + + verify(targetService).getlatestCompletedEventSegmentReplicationState(shardId); + verify(targetService).getOngoingEventSegmentReplicationState(shardId); + } + + public void testNewResponseWhenAllReplicasReturnResponseCombinesTheResults() { + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + List shardFailures = new ArrayList<>(); + String[] shards = { "0", "1" }; + request.shards(shards); + + int totalShards = 6; + int successfulShards = 6; + int failedShard = 0; + String allocIdOne = "allocIdOne"; + String allocIdTwo = "allocIdTwo"; + String allocIdThree = "allocIdThree"; + String allocIdFour = "allocIdFour"; + String allocIdFive = "allocIdFive"; + String allocIdSix = "allocIdSix"; + + ShardId shardId0 = mock(ShardId.class); + ShardRouting primary0 = mock(ShardRouting.class); + ShardRouting replica0 = mock(ShardRouting.class); + ShardRouting searchReplica0 = mock(ShardRouting.class); + + ShardId shardId1 = mock(ShardId.class); + ShardRouting primary1 = mock(ShardRouting.class); + ShardRouting replica1 = mock(ShardRouting.class); + ShardRouting searchReplica1 = mock(ShardRouting.class); + + when(shardId0.getId()).thenReturn(0); + when(shardId0.getIndexName()).thenReturn("test-index-1"); + when(primary0.shardId()).thenReturn(shardId0); + when(replica0.shardId()).thenReturn(shardId0); + when(searchReplica0.shardId()).thenReturn(shardId0); + + when(shardId1.getId()).thenReturn(1); + when(shardId1.getIndexName()).thenReturn("test-index-1"); + when(primary1.shardId()).thenReturn(shardId1); + when(replica1.shardId()).thenReturn(shardId1); + when(searchReplica1.shardId()).thenReturn(shardId1); + + AllocationId allocationIdOne = mock(AllocationId.class); + AllocationId allocationIdTwo = mock(AllocationId.class); + AllocationId allocationIdThree = mock(AllocationId.class); + AllocationId allocationIdFour = mock(AllocationId.class); + AllocationId allocationIdFive = mock(AllocationId.class); + AllocationId allocationIdSix = mock(AllocationId.class); + + when(allocationIdOne.getId()).thenReturn(allocIdOne); + when(allocationIdTwo.getId()).thenReturn(allocIdTwo); + when(allocationIdThree.getId()).thenReturn(allocIdThree); + when(allocationIdFour.getId()).thenReturn(allocIdFour); + when(allocationIdFive.getId()).thenReturn(allocIdFive); + when(allocationIdSix.getId()).thenReturn(allocIdSix); + when(primary0.allocationId()).thenReturn(allocationIdOne); + when(replica0.allocationId()).thenReturn(allocationIdTwo); + when(searchReplica0.allocationId()).thenReturn(allocationIdThree); + when(primary1.allocationId()).thenReturn(allocationIdFour); + when(replica1.allocationId()).thenReturn(allocationIdFive); + when(searchReplica1.allocationId()).thenReturn(allocationIdSix); + + when(primary0.isSearchOnly()).thenReturn(false); + when(replica0.isSearchOnly()).thenReturn(false); + when(searchReplica0.isSearchOnly()).thenReturn(true); + when(primary1.isSearchOnly()).thenReturn(false); + when(replica1.isSearchOnly()).thenReturn(false); + when(searchReplica1.isSearchOnly()).thenReturn(true); + + Set segmentReplicationShardStats0 = new HashSet<>(); + SegmentReplicationShardStats segmentReplicationShardStatsOfReplica0 = new SegmentReplicationShardStats(allocIdTwo, 0, 0, 0, 0, 0); + segmentReplicationShardStats0.add(segmentReplicationShardStatsOfReplica0); + + Set segmentReplicationShardStats1 = new HashSet<>(); + SegmentReplicationShardStats segmentReplicationShardStatsOfReplica1 = new SegmentReplicationShardStats(allocIdFive, 0, 0, 0, 0, 0); + segmentReplicationShardStats1.add(segmentReplicationShardStatsOfReplica1); + + SegmentReplicationPerGroupStats segmentReplicationPerGroupStats0 = new SegmentReplicationPerGroupStats( + shardId0, + segmentReplicationShardStats0, + 0 + ); + + SegmentReplicationPerGroupStats segmentReplicationPerGroupStats1 = new SegmentReplicationPerGroupStats( + shardId1, + segmentReplicationShardStats1, + 0 + ); + + SegmentReplicationState segmentReplicationState0 = mock(SegmentReplicationState.class); + SegmentReplicationState searchReplicaSegmentReplicationState0 = mock(SegmentReplicationState.class); + SegmentReplicationState segmentReplicationState1 = mock(SegmentReplicationState.class); + SegmentReplicationState searchReplicaSegmentReplicationState1 = mock(SegmentReplicationState.class); + + when(segmentReplicationState0.getShardRouting()).thenReturn(replica0); + when(searchReplicaSegmentReplicationState0.getShardRouting()).thenReturn(searchReplica0); + when(segmentReplicationState1.getShardRouting()).thenReturn(replica1); + when(searchReplicaSegmentReplicationState1.getShardRouting()).thenReturn(searchReplica1); + + List responses = List.of( + new SegmentReplicationShardStatsResponse(segmentReplicationPerGroupStats0), + new SegmentReplicationShardStatsResponse(segmentReplicationState0), + new SegmentReplicationShardStatsResponse(searchReplicaSegmentReplicationState0), + new SegmentReplicationShardStatsResponse(segmentReplicationPerGroupStats1), + new SegmentReplicationShardStatsResponse(segmentReplicationState1), + new SegmentReplicationShardStatsResponse(searchReplicaSegmentReplicationState1) + ); + + SegmentReplicationStatsResponse response = action.newResponse( + request, + totalShards, + successfulShards, + failedShard, + responses, + shardFailures, + ClusterState.EMPTY_STATE + ); + + List responseStats = response.getReplicationStats().get("test-index-1"); + SegmentReplicationPerGroupStats primStats0 = responseStats.get(0); + Set replicaStats0 = primStats0.getReplicaStats(); + assertEquals(2, replicaStats0.size()); + for (SegmentReplicationShardStats replicaStat : replicaStats0) { + if (replicaStat.getAllocationId().equals(allocIdTwo)) { + assertEquals(segmentReplicationState0, replicaStat.getCurrentReplicationState()); + } + + if (replicaStat.getAllocationId().equals(allocIdThree)) { + assertEquals(searchReplicaSegmentReplicationState0, replicaStat.getCurrentReplicationState()); + } + } + + SegmentReplicationPerGroupStats primStats1 = responseStats.get(1); + Set replicaStats1 = primStats1.getReplicaStats(); + assertEquals(2, replicaStats1.size()); + for (SegmentReplicationShardStats replicaStat : replicaStats1) { + if (replicaStat.getAllocationId().equals(allocIdFive)) { + assertEquals(segmentReplicationState1, replicaStat.getCurrentReplicationState()); + } + + if (replicaStat.getAllocationId().equals(allocIdSix)) { + assertEquals(searchReplicaSegmentReplicationState1, replicaStat.getCurrentReplicationState()); + } + } + } + + public void testNewResponseWhenShardsToFetchEmptyAndResponsesContainsNull() { + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + List shardFailures = new ArrayList<>(); + String[] shards = {}; + request.shards(shards); + + int totalShards = 3; + int successfulShards = 3; + int failedShard = 0; + String allocIdOne = "allocIdOne"; + String allocIdTwo = "allocIdTwo"; + ShardId shardIdOne = mock(ShardId.class); + ShardId shardIdTwo = mock(ShardId.class); + ShardId shardIdThree = mock(ShardId.class); + ShardRouting shardRoutingOne = mock(ShardRouting.class); + ShardRouting shardRoutingTwo = mock(ShardRouting.class); + ShardRouting shardRoutingThree = mock(ShardRouting.class); + when(shardIdOne.getId()).thenReturn(1); + when(shardIdTwo.getId()).thenReturn(2); + when(shardIdThree.getId()).thenReturn(3); + when(shardRoutingOne.shardId()).thenReturn(shardIdOne); + when(shardRoutingTwo.shardId()).thenReturn(shardIdTwo); + when(shardRoutingThree.shardId()).thenReturn(shardIdThree); + AllocationId allocationId = mock(AllocationId.class); + when(allocationId.getId()).thenReturn(allocIdOne); + when(shardRoutingTwo.allocationId()).thenReturn(allocationId); + when(shardIdOne.getIndexName()).thenReturn("test-index"); + + Set segmentReplicationShardStats = new HashSet<>(); + SegmentReplicationShardStats segmentReplicationShardStatsOfReplica = new SegmentReplicationShardStats(allocIdOne, 0, 0, 0, 0, 0); + segmentReplicationShardStats.add(segmentReplicationShardStatsOfReplica); + SegmentReplicationPerGroupStats segmentReplicationPerGroupStats = new SegmentReplicationPerGroupStats( + shardIdOne, + segmentReplicationShardStats, + 0 + ); + + SegmentReplicationState segmentReplicationState = mock(SegmentReplicationState.class); + SegmentReplicationShardStats segmentReplicationShardStatsFromSearchReplica = mock(SegmentReplicationShardStats.class); + when(segmentReplicationShardStatsFromSearchReplica.getAllocationId()).thenReturn("alloc2"); + when(segmentReplicationState.getShardRouting()).thenReturn(shardRoutingTwo); + + List responses = new ArrayList<>(); + responses.add(null); + responses.add(new SegmentReplicationShardStatsResponse(segmentReplicationPerGroupStats)); + responses.add(new SegmentReplicationShardStatsResponse(segmentReplicationState)); + + SegmentReplicationStatsResponse response = action.newResponse( + request, + totalShards, + successfulShards, + failedShard, + responses, + shardFailures, + ClusterState.EMPTY_STATE + ); + + List responseStats = response.getReplicationStats().get("test-index"); + SegmentReplicationPerGroupStats primStats = responseStats.get(0); + Set segRpShardStatsSet = primStats.getReplicaStats(); + + for (SegmentReplicationShardStats segRpShardStats : segRpShardStatsSet) { + if (segRpShardStats.getAllocationId().equals(allocIdOne)) { + assertEquals(segmentReplicationState, segRpShardStats.getCurrentReplicationState()); + } + + if (segRpShardStats.getAllocationId().equals(allocIdTwo)) { + assertEquals(segmentReplicationShardStatsFromSearchReplica, segRpShardStats); + } + } + } + + public void testShardOperationWithSegRepDisabled() { + ShardRouting shardRouting = mock(ShardRouting.class); + ShardId shardId = new ShardId(new Index("test-index", "test-uuid"), 0); + SegmentReplicationStatsRequest request = new SegmentReplicationStatsRequest(); + + when(shardRouting.shardId()).thenReturn(shardId); + when(indicesService.indexServiceSafe(shardId.getIndex())).thenReturn(indexService); + when(indexService.getShard(shardId.id())).thenReturn(indexShard); + when(indexShard.indexSettings()).thenReturn(createIndexSettingsWithSegRepDisabled()); + + SegmentReplicationShardStatsResponse response = action.shardOperation(request, shardRouting); + + assertNull(response); + } + + public void testGlobalBlockCheck() { + ClusterBlock writeClusterBlock = new ClusterBlock( + 1, + "uuid", + "", + true, + true, + true, + RestStatus.OK, + EnumSet.of(ClusterBlockLevel.METADATA_WRITE) + ); + + ClusterBlock readClusterBlock = new ClusterBlock( + 1, + "uuid", + "", + true, + true, + true, + RestStatus.OK, + EnumSet.of(ClusterBlockLevel.METADATA_READ) + ); + + ClusterBlocks.Builder builder = ClusterBlocks.builder(); + builder.addGlobalBlock(writeClusterBlock); + ClusterState metadataWriteBlockedState = ClusterState.builder(ClusterState.EMPTY_STATE).blocks(builder).build(); + assertNull(action.checkGlobalBlock(metadataWriteBlockedState, new SegmentReplicationStatsRequest())); + + builder = ClusterBlocks.builder(); + builder.addGlobalBlock(readClusterBlock); + ClusterState metadataReadBlockedState = ClusterState.builder(ClusterState.EMPTY_STATE).blocks(builder).build(); + assertNotNull(action.checkGlobalBlock(metadataReadBlockedState, new SegmentReplicationStatsRequest())); + } + + public void testIndexBlockCheck() { + ClusterBlock writeClusterBlock = new ClusterBlock( + 1, + "uuid", + "", + true, + true, + true, + RestStatus.OK, + EnumSet.of(ClusterBlockLevel.METADATA_WRITE) + ); + + ClusterBlock readClusterBlock = new ClusterBlock( + 1, + "uuid", + "", + true, + true, + true, + RestStatus.OK, + EnumSet.of(ClusterBlockLevel.METADATA_READ) + ); + + String indexName = "test"; + ClusterBlocks.Builder builder = ClusterBlocks.builder(); + builder.addIndexBlock(indexName, writeClusterBlock); + ClusterState metadataWriteBlockedState = ClusterState.builder(ClusterState.EMPTY_STATE).blocks(builder).build(); + assertNull(action.checkRequestBlock(metadataWriteBlockedState, new SegmentReplicationStatsRequest(), new String[] { indexName })); + + builder = ClusterBlocks.builder(); + builder.addIndexBlock(indexName, readClusterBlock); + ClusterState metadataReadBlockedState = ClusterState.builder(ClusterState.EMPTY_STATE).blocks(builder).build(); + assertNotNull(action.checkRequestBlock(metadataReadBlockedState, new SegmentReplicationStatsRequest(), new String[] { indexName })); + } + + private IndexSettings createIndexSettingsWithSegRepEnabled() { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .build(); + + return new IndexSettings(IndexMetadata.builder("test").settings(settings).build(), settings); + } + + private IndexSettings createIndexSettingsWithSegRepDisabled() { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .build(); + return new IndexSettings(IndexMetadata.builder("test").settings(settings).build(), settings); + } +}