From 6641ef8a0a39d86aadd272936cae25a406eb3485 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Fri, 20 Oct 2023 18:06:40 +0530 Subject: [PATCH 01/13] Restore remote index shards with ExistingStoreRecoverySource after restore from remote state (#10665) * Restore remote index shards with ExistingStoreRecoverySource after restore from remote state Signed-off-by: bansvaru --- .../remotestore/BaseRemoteStoreRestoreIT.java | 5 +- .../RemoteStoreClusterStateRestoreIT.java | 28 ++- .../cluster/routing/IndexRoutingTable.java | 4 +- .../gateway/ClusterStateUpdaters.java | 17 +- .../recovery/RemoteStoreRestoreService.java | 30 +-- .../gateway/ClusterStateUpdatersTests.java | 237 ++---------------- 6 files changed, 53 insertions(+), 268 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index b8481610869e6..99c5d7fb2bae7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -46,7 +46,10 @@ protected Collection> nodePlugins() { } protected void restore(String... indices) { - boolean restoreAllShards = randomBoolean(); + restore(randomBoolean(), indices); + } + + protected void restore(boolean restoreAllShards, String... indices) { if (restoreAllShards) { assertAcked(client().admin().indices().prepareClose(indices)); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 3a3e293de9b13..c2cb7cc60f152 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -65,6 +65,13 @@ private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().startDataOnlyNodes(dataNodeCount); } + protected void verifyRedIndicesAndTriggerRestore(Map indexStats, String indexName, boolean indexMoreDocs) + throws Exception { + ensureRed(indexName); + restore(false, indexName); + verifyRestoredData(indexStats, indexName, indexMoreDocs); + } + public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -83,7 +90,7 @@ public void testFullClusterRestore() throws Exception { // Step - 3 Trigger full cluster restore and validate validateMetadata(List.of(INDEX_NAME)); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } public void testFullClusterRestoreMultipleIndices() throws Exception { @@ -112,8 +119,8 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { // Step - 3 Trigger full cluster restore validateMetadata(List.of(INDEX_NAME, secondIndexName)); - verifyRestoredData(indexStats, INDEX_NAME); - verifyRestoredData(indexStats2, secondIndexName, false); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + verifyRedIndicesAndTriggerRestore(indexStats2, secondIndexName, false); assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); // Test is complete @@ -181,7 +188,7 @@ public void testRemoteStateFullRestart() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } private void validateMetadata(List indexNames) { @@ -246,19 +253,18 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { // Step - 3 Trigger full cluster restore and validate // validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME, false); - - // validate global metadata restored - verifyRestoredRepositories(); - verifyRestoredIndexTemplate(); assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); - // Test is complete - // Remote the cluster read only block to ensure proper cleanup updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(); + verifyRestoredIndexTemplate(); } private void registerCustomRepository() { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index b12698c8a320e..d77d44580798a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -466,12 +466,12 @@ public Builder initializeAsRemoteStoreRestore( } for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) { ShardId shardId = new ShardId(index, shardNumber); - if (forceRecoverAllPrimaries == false && indexShardRoutingTableMap.containsKey(shardId) == false) { + if (indexShardRoutingTableMap.containsKey(shardId) == false) { throw new IllegalStateException("IndexShardRoutingTable is not present for shardId: " + shardId); } IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingTableMap.get(shardId); - if (forceRecoverAllPrimaries || indexShardRoutingTable == null || indexShardRoutingTable.primaryShard().unassigned()) { + if (forceRecoverAllPrimaries || indexShardRoutingTable.primaryShard().unassigned()) { // Primary shard to be recovered from remote store. indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, true, recoverySource, unassignedInfo)); // All the replica shards to be recovered from peer recovery. diff --git a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java index 4c562b348f141..1563ac84bdd1c 100644 --- a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java +++ b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java @@ -41,7 +41,6 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.settings.ClusterSettings; @@ -121,21 +120,7 @@ static ClusterState updateRoutingTable(final ClusterState state) { // initialize all index routing tables as empty final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable()); for (final IndexMetadata cursor : state.metadata().indices().values()) { - // Whether IndexMetadata is recovered from local disk or remote it doesn't matter to us at this point. - // We are only concerned about index data recovery here. Which is why we only check for remote store enabled and not for remote - // cluster state enabled. - if (cursor.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false) == false - || state.routingTable().hasIndex(cursor.getIndex()) == false - || state.routingTable() - .index(cursor.getIndex()) - .shardsMatchingPredicateCount( - shardRouting -> shardRouting.primary() - // We need to ensure atleast one of the primaries is being recovered from remote. - // This ensures we have gone through the RemoteStoreRestoreService and routing table is updated - && shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) == 0) { - routingTableBuilder.addAsRecovery(cursor); - } + routingTableBuilder.addAsRecovery(cursor); } // start with 0 based versions for routing table routingTableBuilder.version(0); diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index ac9cf35d1d8e5..6692d521b8f65 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -183,6 +183,7 @@ private RemoteRestoreResult executeRestore( final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); int totalShards = 0; + boolean metadataFromRemoteStore = false; ClusterState.Builder builder = ClusterState.builder(currentState); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()); @@ -190,7 +191,7 @@ private RemoteRestoreResult executeRestore( for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { String indexName = indexMetadataEntry.getKey(); IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); + metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); IndexMetadata updatedIndexMetadata = indexMetadata; if (metadataFromRemoteStore == false && restoreAllShards) { updatedIndexMetadata = IndexMetadata.builder(indexMetadata) @@ -204,27 +205,23 @@ private RemoteRestoreResult executeRestore( IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID()); - Map indexShardRoutingTableMap = new HashMap<>(); if (metadataFromRemoteStore == false) { - indexShardRoutingTableMap = currentState.routingTable() + Map indexShardRoutingTableMap = currentState.routingTable() .index(indexName) .shards() .values() .stream() .collect(Collectors.toMap(IndexShardRoutingTable::shardId, Function.identity())); + + RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( + restoreUUID, + updatedIndexMetadata.getCreationVersion(), + indexId + ); + + rtBuilder.addAsRemoteStoreRestore(updatedIndexMetadata, recoverySource, indexShardRoutingTableMap, restoreAllShards); } - RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( - restoreUUID, - updatedIndexMetadata.getCreationVersion(), - indexId - ); - rtBuilder.addAsRemoteStoreRestore( - updatedIndexMetadata, - recoverySource, - indexShardRoutingTableMap, - restoreAllShards || metadataFromRemoteStore - ); blocks.updateBlocks(updatedIndexMetadata); mdBuilder.put(updatedIndexMetadata, true); indicesToBeRestored.add(indexName); @@ -239,7 +236,10 @@ private RemoteRestoreResult executeRestore( RoutingTable rt = rtBuilder.build(); ClusterState updatedState = builder.metadata(mdBuilder).blocks(blocks).routingTable(rt).build(); - return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); + if (metadataFromRemoteStore == false) { + updatedState = allocationService.reroute(updatedState, "restored from remote store"); + } + return RemoteRestoreResult.build(restoreUUID, restoreInfo, updatedState); } private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index 9b3fd45245ef7..1c43bb565ef69 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.UnassignedInfo; @@ -52,12 +53,14 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.repositories.IndexId; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; @@ -275,7 +278,7 @@ public void testUpdateRoutingTable() { } } - public void testSkipRoutingTableUpdateWhenRemoteRecovery() { + public void testRoutingTableUpdateWhenRemoteStateRecovery() { final int numOfShards = randomIntBetween(1, 10); final IndexMetadata remoteMetadata = createIndexMetadata( @@ -286,7 +289,7 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { .build() ); - // Test remote index routing table is generated with ExistingStoreRecoverySource if no routing table is present + // Test remote index routing table is generated with ExistingStoreRecoverySource { final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) @@ -322,48 +325,14 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { } - // Test remote index routing table is overridden if recovery source is not RemoteStoreRecoverySource + // Test remote index routing table is overridden if recovery source is RemoteStoreRecoverySource { - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsNew(remoteMetadata); final Index index = remoteMetadata.getIndex(); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .routingTable(new RoutingTable.Builder().add(remoteBuilderWithoutRemoteRecovery.build()).build()) - .build(); - assertTrue(initialState.routingTable().hasIndex(index)); - final ClusterState newState = updateRoutingTable(initialState); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - assertTrue(newState.routingTable().hasIndex(index)); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - - } - - // Test routing table update is skipped for a remote index - { + Map routingTableMap = new HashMap<>(); + for (int shardNumber = 0; shardNumber < remoteMetadata.getNumberOfShards(); shardNumber++) { + ShardId shardId = new ShardId(index, shardNumber); + routingTableMap.put(shardId, new IndexShardRoutingTable.Builder(new ShardId(remoteMetadata.getIndex(), 1)).build()); + } IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) .initializeAsRemoteStoreRestore( remoteMetadata, @@ -372,10 +341,9 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { remoteMetadata.getCreationVersion(), new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) ), - new HashMap<>(), + routingTableMap, true ); - final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) .metadata(Metadata.builder().put(remoteMetadata, false).build()) .routingTable(new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()).build()) @@ -387,205 +355,28 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - - } - - // Test reset routing table for 2 indices - one remote and one non remote. - // Routing table for non remote index should be updated and remote index routing table should remain intact - { - final IndexMetadata nonRemoteMetadata = createIndexMetadata( - "test-nonremote", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards).build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder nonRemoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(nonRemoteMetadata.getIndex()) - .initializeAsNew(nonRemoteMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(nonRemoteMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(nonRemoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newNonRemoteIndexRoutingTable = newState.routingTable().index(nonRemoteMetadata.getIndex()); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) ) ); assertEquals( numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - } - - // Test reset routing table for 2 indices, both remote backed but only once index has RemoteStoreRecoverySource. - // Routing table for only remote index without RemoteStoreRecoverySource should be updated - { - final IndexMetadata remoteWithoutRemoteRecoveryMetadata = createIndexMetadata( - "test-remote-without-recovery", - Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) - .build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder( - remoteWithoutRemoteRecoveryMetadata.getIndex() - ).initializeAsNew(remoteWithoutRemoteRecoveryMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(remoteWithoutRemoteRecoveryMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(remoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newRemoteWithoutRemoteRecoveryIndexRoutingTable = newState.routingTable() - .index(remoteWithoutRemoteRecoveryMetadata.getIndex()); - assertEquals( - 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) ) ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource ) ); assertEquals( numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource ) ); + } } From e12ab0f9ed5cfa32fbbcf654253180a72e50d0d7 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 18:18:36 +0530 Subject: [PATCH 02/13] Fix flaky remote cluster state UT (#10780) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 433eac63e9580..bcc58789dd6fd 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -458,7 +458,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { mockBlobStoreObjects(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) - .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata).version(randomNonNegativeLong())) .build(); final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() .codecVersion(2) @@ -479,6 +479,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { // new cluster state where only global metadata is different Metadata newMetadata = Metadata.builder(clusterState.metadata()) .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .version(randomNonNegativeLong()) .build(); ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); @@ -1281,7 +1282,7 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() ); } From 5093cc71476f5796f3b91fb18ff867cfe37588c0 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 19:18:25 +0530 Subject: [PATCH 03/13] Fix spotless failure (#10782) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index bcc58789dd6fd..5202f31c514ed 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -1282,7 +1282,12 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder() + .version(randomNonNegativeLong()) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() ); } From e691df09c66dcc1693897543fd7633c4b208ce48 Mon Sep 17 00:00:00 2001 From: rayshrey <121871912+rayshrey@users.noreply.github.com> Date: Fri, 20 Oct 2023 20:36:20 +0530 Subject: [PATCH 04/13] Add tracing instrumentation for indexing paths (#10273) * Add tracing instrumentation for indexing paths Signed-off-by: Shreyansh Ray * Fix failing tests and review changes Signed-off-by: Shreyansh Ray * Fix test failures due to Span not being properly closed Signed-off-by: Shreyansh Ray * Changes to spans in primary and replica actions Signed-off-by: Shreyansh Ray * Review comments fixes and refactoring Signed-off-by: Shreyansh Ray * Precommit auto-changes Signed-off-by: Shreyansh Ray * Add refresh policy as attribute Signed-off-by: Shreyansh Ray * Fix changelog entry Signed-off-by: Shreyansh Ray * Instrument primary/replica write in TransportWriteAction instead of TransportShardBulkAction Signed-off-by: Shreyansh Ray * Modify SpanBuilder Signed-off-by: Shreyansh Ray * spotlessApply and precommit Signed-off-by: Shreyansh Ray * Change span names Signed-off-by: Shreyansh Ray * Pass Noop Tracer instead of injected tracer Signed-off-by: Shreyansh Ray * Reverting previous changes Signed-off-by: Shreyansh Ray * Remove tracer variable from TransportShardBulkAction Signed-off-by: Shreyansh Ray --------- Signed-off-by: Shreyansh Ray --- CHANGELOG.md | 1 + .../action/bulk/TransportBulkAction.java | 110 +++++++++++------- .../action/bulk/TransportShardBulkAction.java | 7 +- .../TransportResyncReplicationAction.java | 7 +- .../replication/TransportWriteAction.java | 24 +++- .../index/seqno/RetentionLeaseSyncAction.java | 7 +- .../telemetry/tracing/AttributeNames.java | 25 ++++ .../telemetry/tracing/SpanBuilder.java | 20 ++++ ...ActionIndicesThatCannotBeCreatedTests.java | 4 +- .../bulk/TransportBulkActionIngestTests.java | 4 +- .../action/bulk/TransportBulkActionTests.java | 3 +- .../bulk/TransportBulkActionTookTests.java | 3 +- .../bulk/TransportShardBulkActionTests.java | 13 ++- ...TransportResyncReplicationActionTests.java | 6 +- ...rtWriteActionForIndexingPressureTests.java | 3 +- .../TransportWriteActionTests.java | 6 +- .../seqno/RetentionLeaseSyncActionTests.java | 12 +- .../snapshots/SnapshotResiliencyTests.java | 9 +- 18 files changed, 192 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c52c43a35b8d..d7d492679c79d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) +- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 726ba7ba119af..4a9b07c12821d 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -85,6 +85,11 @@ import org.opensearch.ingest.IngestService; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportService; @@ -133,6 +138,7 @@ public class TransportBulkAction extends HandledTransportAction() { - @Override - public void onResponse(BulkShardResponse bulkShardResponse) { - for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { - // we may have no response if item failed - if (bulkItemResponse.getResponse() != null) { - bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); - } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(bulkItemResponse.getItemId(), bulkItemResponse); - } + final Span span = tracer.startSpan(SpanBuilder.from("bulkShardAction", nodeId, bulkShardRequest)); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + shardBulkAction.execute( + bulkShardRequest, + TraceableActionListener.create(ActionListener.runBefore(new ActionListener() { + @Override + public void onResponse(BulkShardResponse bulkShardResponse) { + for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { + // we may have no response if item failed + if (bulkItemResponse.getResponse() != null) { + bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + docStatusStats.inc(bulkItemResponse.status()); + responses.set(bulkItemResponse.getItemId(), bulkItemResponse); + } - @Override - public void onFailure(Exception e) { - // create failures for all relevant requests - for (BulkItemRequest request : requests) { - final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); - final DocWriteRequest docWriteRequest = request.request(); - final BulkItemResponse bulkItemResponse = new BulkItemResponse( - request.id(), - docWriteRequest.opType(), - new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) - ); + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(request.id(), bulkItemResponse); - } + @Override + public void onFailure(Exception e) { + // create failures for all relevant requests + for (BulkItemRequest request : requests) { + final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); + final DocWriteRequest docWriteRequest = request.request(); + final BulkItemResponse bulkItemResponse = new BulkItemResponse( + request.id(), + docWriteRequest.opType(), + new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) + ); + + docStatusStats.inc(bulkItemResponse.status()); + responses.set(request.id(), bulkItemResponse); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - private void finishHim() { - indicesService.addDocStatusStats(docStatusStats); - listener.onResponse( - new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTimeNanos)) - ); - } - }, releasable::close)); + private void finishHim() { + indicesService.addDocStatusStats(docStatusStats); + listener.onResponse( + new BulkResponse( + responses.toArray(new BulkItemResponse[responses.length()]), + buildTookInMillis(startTimeNanos) + ) + ); + } + }, releasable::close), span, tracer) + ); + } catch (Exception e) { + span.setError(e); + span.endSpan(); + throw e; + } } bulkRequest = null; // allow memory for bulk request items to be reclaimed before all items have been completed } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index fddda0ef1f9a7..268a6ed6f85b8 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -99,6 +99,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportChannel; @@ -161,7 +162,8 @@ public TransportShardBulkAction( IndexingPressureService indexingPressureService, SegmentReplicationPressureService segmentReplicationPressureService, RemoteStorePressureService remoteStorePressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -177,7 +179,8 @@ public TransportShardBulkAction( EXECUTOR_NAME_FUNCTION, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); this.updateHelper = updateHelper; this.mappingUpdatedAction = mappingUpdatedAction; diff --git a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java index 032fe83e2220b..9d60706d1f100 100644 --- a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java @@ -54,6 +54,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportException; @@ -93,7 +94,8 @@ public TransportResyncReplicationAction( ShardStateAction shardStateAction, ActionFilters actionFilters, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -109,7 +111,8 @@ public TransportResyncReplicationAction( EXECUTOR_NAME_FUNCTION, true, /* we should never reject resync because of thread pool capacity on primary */ indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java index a0b5299805868..9ebfa8cfd0df8 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java @@ -59,6 +59,11 @@ import org.opensearch.index.translog.Translog.Location; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -82,6 +87,7 @@ public abstract class TransportWriteAction< protected final SystemIndices systemIndices; private final Function executorFunction; + private final Tracer tracer; protected TransportWriteAction( Settings settings, @@ -97,7 +103,8 @@ protected TransportWriteAction( Function executorFunction, boolean forceExecutionOnPrimary, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { // We pass ThreadPool.Names.SAME to the super class as we control the dispatching to the // ThreadPool.Names.WRITE/ThreadPool.Names.SYSTEM_WRITE thread pools in this class. @@ -119,6 +126,7 @@ protected TransportWriteAction( this.executorFunction = executorFunction; this.indexingPressureService = indexingPressureService; this.systemIndices = systemIndices; + this.tracer = tracer; } protected String executor(IndexShard shard) { @@ -220,7 +228,12 @@ protected void shardOperationOnPrimary( threadPool.executor(executor).execute(new ActionRunnable>(listener) { @Override protected void doRun() { - dispatchedShardOperationOnPrimary(request, primary, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnPrimary", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnPrimary(request, primary, TraceableActionListener.create(listener, span, tracer)); + } } @Override @@ -248,7 +261,12 @@ protected void shardOperationOnReplica(ReplicaRequest request, IndexShard replic threadPool.executor(executorFunction.apply(replica)).execute(new ActionRunnable(listener) { @Override protected void doRun() { - dispatchedShardOperationOnReplica(request, replica, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnReplica", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnReplica(request, replica, TraceableActionListener.create(listener, span, tracer)); + } } @Override diff --git a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java index f74fc7eefe65c..ca3c7e1d49700 100644 --- a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java @@ -62,6 +62,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; @@ -99,7 +100,8 @@ public RetentionLeaseSyncAction( final ShardStateAction shardStateAction, final ActionFilters actionFilters, final IndexingPressureService indexingPressureService, - final SystemIndices systemIndices + final SystemIndices systemIndices, + final Tracer tracer ) { super( settings, @@ -115,7 +117,8 @@ public RetentionLeaseSyncAction( ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index a9514c298ef88..b6b2cf360d1c5 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -69,4 +69,29 @@ private AttributeNames() { * Action Name. */ public static final String TRANSPORT_ACTION = "action"; + + /** + * Index Name + */ + public static final String INDEX = "index"; + + /** + * Shard ID + */ + public static final String SHARD_ID = "shard_id"; + + /** + * Number of request items in bulk request + */ + public static final String BULK_REQUEST_ITEMS = "bulk_request_items"; + + /** + * Node ID + */ + public static final String NODE_ID = "node_id"; + + /** + * Refresh Policy + */ + public static final String REFRESH_POLICY = "refresh_policy"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index d97fbd371ab2a..1dce422943b7a 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -8,6 +8,8 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.action.bulk.BulkShardRequest; +import org.opensearch.action.support.replication.ReplicatedWriteRequest; import org.opensearch.common.annotation.InternalApi; import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; @@ -68,6 +70,10 @@ public static SpanCreationContext from(String action, Transport.Connection conne return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); } + public static SpanCreationContext from(String spanName, String nodeId, ReplicatedWriteRequest request) { + return SpanCreationContext.server().name(spanName).attributes(buildSpanAttributes(nodeId, request)); + } + private static String createSpanName(HttpRequest httpRequest) { return httpRequest.method().name() + SEPARATOR + httpRequest.uri(); } @@ -150,4 +156,18 @@ private static Attributes buildSpanAttributes(String action, TcpChannel tcpChann return attributes; } + private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequest request) { + Attributes attributes = Attributes.create() + .addAttribute(AttributeNames.NODE_ID, nodeId) + .addAttribute(AttributeNames.REFRESH_POLICY, request.getRefreshPolicy().getValue()); + if (request.shardId() != null) { + attributes.addAttribute(AttributeNames.INDEX, request.shardId().getIndexName()) + .addAttribute(AttributeNames.SHARD_ID, request.shardId().getId()); + } + if (request instanceof BulkShardRequest) { + attributes.addAttribute(AttributeNames.BULK_REQUEST_ITEMS, ((BulkShardRequest) request).items().length); + } + return attributes; + } + } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index 0f67eff26cbde..cf7080ab2fc06 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -54,6 +54,7 @@ import org.opensearch.index.VersionType; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -155,7 +156,8 @@ private void indicesThatCannotBeCreatedTestCase( new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) { @Override void executeBulk( diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 515f6eae28a34..141c630b94020 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -70,6 +70,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -172,7 +173,8 @@ class TestTransportBulkAction extends TransportBulkAction { new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java index 10cad6fb147a2..6bbd740df7f9c 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java @@ -118,7 +118,8 @@ class TestTransportBulkAction extends TransportBulkAction { new AutoCreateIndex(Settings.EMPTY, clusterService.getClusterSettings(), new Resolver(), new SystemIndices(emptyMap())), new IndexingPressureService(Settings.EMPTY, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java index 852e3837e1e7a..9d5b4430ea395 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java @@ -282,7 +282,8 @@ static class TestTransportBulkAction extends TransportBulkAction { new IndexingPressureService(Settings.EMPTY, clusterService), null, new SystemIndices(emptyMap()), - relativeTimeProvider + relativeTimeProvider, + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index fe0fdd07025d9..b325cfa197933 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -88,6 +88,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -1074,7 +1075,8 @@ public void testHandlePrimaryTermValidationRequestWithDifferentAllocationId() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId + "-1", 1, shardId), @@ -1105,7 +1107,8 @@ public void testHandlePrimaryTermValidationRequestWithOlderPrimaryTerm() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1136,7 +1139,8 @@ public void testHandlePrimaryTermValidationRequestSuccess() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1178,7 +1182,8 @@ private TransportShardBulkAction createAction() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java index 3bd8930064563..da87a0a967f53 100644 --- a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java +++ b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java @@ -203,7 +203,8 @@ public void testResyncDoesNotBlockOnPrimaryAction() throws Exception { shardStateAction, new ActionFilters(new HashSet<>()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertThat(action.globalBlockLevel(), nullValue()); @@ -256,7 +257,8 @@ private TransportResyncReplicationAction createAction() { mock(ShardStateAction.class), new ActionFilters(new HashSet<>()), mock(IndexingPressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java index 4a2185d1558f7..7212b1f5efe13 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java @@ -392,7 +392,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, TransportWriteActionForIndexingPressureTests.this.indexingPressureService, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java index 9d2069ac16190..b4549f82230bf 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java @@ -477,7 +477,8 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(Settings.EMPTY, TransportWriteActionTests.this.clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = withDocumentFailureOnPrimary; this.withDocumentFailureOnReplica = withDocumentFailureOnReplica; @@ -505,7 +506,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = false; this.withDocumentFailureOnReplica = false; diff --git a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java index d9bca55a208c2..63a9ac2f2e8ec 100644 --- a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java @@ -125,7 +125,8 @@ public void testRetentionLeaseSyncActionOnPrimary() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -162,7 +163,8 @@ public void testRetentionLeaseSyncActionOnReplica() throws Exception { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -203,7 +205,8 @@ public void testBlocks() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertNull(action.indexBlockLevel()); @@ -233,7 +236,8 @@ private RetentionLeaseSyncAction createAction() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 2f9f38d18a064..710717532ceb4 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2124,7 +2124,8 @@ public void onFailure(final Exception e) { shardStateAction, actionFilters, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ), new GlobalCheckpointSyncAction( @@ -2190,7 +2191,8 @@ public void onFailure(final Exception e) { mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); actions.put( BulkAction.INSTANCE, @@ -2214,7 +2216,8 @@ public void onFailure(final Exception e) { new AutoCreateIndex(settings, clusterSettings, indexNameExpressionResolver, new SystemIndices(emptyMap())), new IndexingPressureService(settings, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ); final RestoreService restoreService = new RestoreService( From a1fde65fe2ad1acda4364cf5fb751e9b494327ce Mon Sep 17 00:00:00 2001 From: Ashish Date: Fri, 20 Oct 2023 21:06:42 +0530 Subject: [PATCH 05/13] [Remote Store] Fix relocation failure due to transport receive timeout (#10761) * [Remote Store] Fix relocation failure due to transport receive timeout Signed-off-by: Ashish Singh * Fix existing extended shardIdle for remote backed shards Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- .../opensearch/remotestore/RemoteStoreIT.java | 23 +++++++++++++++++++ .../org/opensearch/index/IndexSettings.java | 3 +++ .../opensearch/index/shard/IndexShard.java | 6 ++++- .../translog/InternalTranslogManager.java | 10 ++++---- .../index/translog/RemoteFsTranslog.java | 5 ++++ .../opensearch/index/translog/Translog.java | 4 ++++ .../recovery/PeerRecoverySourceService.java | 3 ++- .../recovery/RemoteRecoveryTargetHandler.java | 13 +++++++++-- .../index/shard/RemoteIndexShardTests.java | 9 ++++++++ .../SegmentReplicationIndexShardTests.java | 12 ++++++---- 10 files changed, 75 insertions(+), 13 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 1fb5c2052aded..b3b4f8e10fd31 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -509,4 +509,27 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti assertHitCount(client(dataNodes.get(1)).prepareSearch(INDEX_NAME).setSize(0).get(), 50); }); } + + public void testNoSearchIdleForAnyReplicaCount() throws ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + String primaryShardNode = internalCluster().startDataOnlyNodes(1).get(0); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureGreen(INDEX_NAME); + IndexShard indexShard = getIndexShard(primaryShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + + String replicaShardNode = internalCluster().startDataOnlyNodes(1).get(0); + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureGreen(INDEX_NAME); + assertFalse(indexShard.isSearchIdleSupported()); + + indexShard = getIndexShard(replicaShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + } } diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e90e9259f6a5c..99d2b5a74c406 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -1024,6 +1024,9 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } private void setSearchIdleAfter(TimeValue searchIdleAfter) { + if (this.isRemoteStoreEnabled) { + logger.warn("Search idle is not supported for remote backed indices"); + } if (this.replicationType == ReplicationType.SEGMENT && this.getNumberOfReplicas() > 0) { logger.warn("Search idle is not supported for indices with replicas using 'replication.type: SEGMENT'"); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 5ebfd3863a6cf..1b7d1b2716979 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4425,7 +4425,6 @@ public final boolean isSearchIdle() { } /** - * * Returns true if this shard supports search idle. *

* Indices using Segment Replication will ignore search idle unless there are no replicas. @@ -4434,6 +4433,11 @@ public final boolean isSearchIdle() { * a new set of segments. */ public final boolean isSearchIdleSupported() { + // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // task continues to upload to remote store periodically. + if (isRemoteTranslogEnabled()) { + return false; + } return indexSettings.isSegRepEnabled() == false || indexSettings.getNumberOfReplicas() == 0; } diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 85c52b907d326..4d0fc13d433c6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,10 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - final long translogGenerationOfLastCommit = translog.getMinGenerationForSeqNo( - localCheckpointOfLastCommit + 1 - ).translogFileGeneration; - if (translog.sizeInBytesByMinGen(translogGenerationOfLastCommit) < flushThreshold) { + // This is the minimum seqNo that is referred in translog and considered for calculating translog size + long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; + if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } /* @@ -454,7 +454,7 @@ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long fl final long translogGenerationOfNewCommit = translog.getMinGenerationForSeqNo( localCheckpointTrackerSupplier.get().getProcessedCheckpoint() + 1 ).translogFileGeneration; - return translogGenerationOfLastCommit < translogGenerationOfNewCommit + return minReferencedTranslogGeneration < translogGenerationOfNewCommit || localCheckpointTrackerSupplier.get().getProcessedCheckpoint() == localCheckpointTrackerSupplier.get().getMaxSeqNo(); } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 29c825fd383c5..2dd9b1a545d4a 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -544,4 +544,9 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } } + + @Override + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minSeqNoToKeep; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index cf7f18840a03e..b44aa6e059224 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2034,4 +2034,8 @@ public static String createEmptyTranslog( writer.close(); return uuid; } + + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minUnrefCheckpointInLastCommit; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java index 6c7632a8a408d..cb2bedf00de99 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java @@ -376,7 +376,8 @@ private Tuple createRecovery transportService, request.targetNode(), recoverySettings, - throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime) + throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime), + shard.isRemoteTranslogEnabled() ); handler = RecoverySourceHandlerFactory.create(shard, recoveryTarget, request, recoverySettings); return Tuple.tuple(handler, recoveryTarget); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index 66f5b13449f05..37227596fdfe7 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -75,6 +75,7 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler { private final AtomicLong requestSeqNoGenerator = new AtomicLong(0); private final RetryableTransportClient retryableTransportClient; private final RemoteSegmentFileChunkWriter fileChunkWriter; + private final boolean remoteStoreEnabled; public RemoteRecoveryTargetHandler( long recoveryId, @@ -82,7 +83,8 @@ public RemoteRecoveryTargetHandler( TransportService transportService, DiscoveryNode targetNode, RecoverySettings recoverySettings, - Consumer onSourceThrottle + Consumer onSourceThrottle, + boolean remoteStoreEnabled ) { this.transportService = transportService; // It is safe to pass the retry timeout value here because RemoteRecoveryTargetHandler @@ -111,6 +113,7 @@ public RemoteRecoveryTargetHandler( requestSeqNoGenerator, onSourceThrottle ); + this.remoteStoreEnabled = remoteStoreEnabled; } public DiscoveryNode targetNode() { @@ -129,7 +132,13 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener reader = in -> TransportResponse.Empty.INSTANCE; final ActionListener responseListener = ActionListener.map(listener, r -> null); - retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + if (remoteStoreEnabled) { + // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the + // target host along with incremental segments download. + retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); + } else { + retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + } } @Override diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index fe389e3b3fcb4..703a7d457d5b6 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -471,6 +471,15 @@ public void onReplicationFailure( } } + @Override + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertFalse(primary.isSearchIdleSupported()); + assertTrue(primary.isSearchIdle()); + assertTrue(primary.scheduledRefresh()); + assertFalse(primary.hasRefreshPending()); + } + private void assertSingleSegmentFile(IndexShard shard, String fileName) throws IOException { final Set segmentsFileNames = Arrays.stream(shard.store().directory().listAll()) .filter(file -> file.startsWith(IndexFileNames.SEGMENTS)) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index eab38bfe5c64d..7caff3e5f5479 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -436,13 +436,17 @@ public void testShardIdleWithNoReplicas() throws Exception { shards.startAll(); final IndexShard primary = shards.getPrimary(); shards.indexDocs(randomIntBetween(1, 10)); - // ensure search idle conditions are met. - assertTrue(primary.isSearchIdle()); - assertFalse(primary.scheduledRefresh()); - assertTrue(primary.hasRefreshPending()); + validateShardIdleWithNoReplicas(primary); } } + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertTrue(primary.isSearchIdle()); + assertFalse(primary.scheduledRefresh()); + assertTrue(primary.hasRefreshPending()); + } + /** * here we are starting a new primary shard in PrimaryMode and testing if the shard publishes checkpoint after refresh. */ From ffe9371b38f98a2c9314bb6cfed4f8f224ff085c Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 20 Oct 2023 12:40:04 -0400 Subject: [PATCH 06/13] Update Github pull request template to have a task for inspecting failing checks (#10751) * Update Github pull request template to have a task for inspecting failing checks Signed-off-by: Andriy Redko * Add failing builds troubleshooting section to CONTRIBUTING.md Signed-off-by: Andriy Redko * Address review comments Signed-off-by: Andriy Redko --------- Signed-off-by: Andriy Redko --- .github/pull_request_template.md | 1 + CONTRIBUTING.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c47b9e0b69256..908a032bf833e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] All tests pass - [ ] New functionality has been documented. - [ ] New functionality has javadoc added +- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d379d78829318..4a1162cf2558b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,6 +8,7 @@ - [Developer Certificate of Origin](#developer-certificate-of-origin) - [Changelog](#changelog) - [Review Process](#review-process) + - [Troubleshooting Failing Builds](#troubleshooting-failing-builds) # Contributing to OpenSearch @@ -162,3 +163,14 @@ During the PR process, expect that there will be some back-and-forth. Please try If we accept the PR, a [maintainer](MAINTAINERS.md) will merge your change and usually take care of backporting it to appropriate branches ourselves. If we reject the PR, we will close the pull request with a comment explaining why. This decision isn't always final: if you feel we have misunderstood your intended change or otherwise think that we should reconsider then please continue the conversation with a comment on the PR and we'll do our best to address any further points you raise. + +## Troubleshooting Failing Builds + +The OpenSearch testing framework offers many capabilities but exhibits significant complexity (it does lot of randomization internally to cover as many edge cases and variations as possible). Unfortunately, this posses a challenge by making it harder to discover important issues/bugs in straightforward way and may lead to so called flaky tests - the tests which flip randomly from success to failure without any code changes. + +If your pull request reports a failing test(s) on one of the checks, please: + - look if there is an existing [issue](https://github.com/opensearch-project/OpenSearch/issues) reported for the test in question + - if not, please make sure this is not caused by your changes, run the failing test(s) locally for some time + - if you are sure the failure is not related, please open a new [bug](https://github.com/opensearch-project/OpenSearch/issues/new?assignees=&labels=bug%2C+untriaged&projects=&template=bug_template.md&title=%5BBUG%5D) with `flaky-test` label + - add a comment referencing the issue(s) or bug report(s) to your pull request explaining the failing build(s) + - as a bonus point, try to contribute by fixing the flaky test(s) From 1e28738b8c966011bf1ae1f00431f0377761cb0a Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Fri, 20 Oct 2023 12:17:51 -0500 Subject: [PATCH 07/13] Increase remote recovery thread pool size (#10750) The remote recovery thread pool does blocking I/O when downloading files, so the "half processor count max 10" was definitely too small. This can be shown by triggering recoveries on a node that is also doing segment replication, and the replication lag will increase due to contention on that thread pool. Some amount of contention is inevitable, but the change here to increase the download thread pool, and also limit the concurrent usage of that thread pool by any single recovery/replication to 25% of the threads does help. Long term, we can improve this even further by moving to fully async I/O to avoid blocking threads in the application on draining InputStreams. Signed-off-by: Andrew Ross --- .../org/opensearch/indices/recovery/RecoverySettings.java | 7 ++++--- .../main/java/org/opensearch/threadpool/ThreadPool.java | 7 ++++++- .../org/opensearch/threadpool/ScalingThreadPoolTests.java | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java index 44dfb2f4cb00a..0f3025369833d 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java @@ -41,6 +41,7 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; @@ -87,10 +88,10 @@ public class RecoverySettings { /** * Controls the maximum number of streams that can be started concurrently per recovery when downloading from the remote store. */ - public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = Setting.intSetting( + public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = new Setting<>( "indices.recovery.max_concurrent_remote_store_streams", - 10, - 1, + (s) -> Integer.toString(Math.max(1, OpenSearchExecutors.allocatedProcessors(s) / 2)), + (s) -> Setting.parseInt(s, 1, "indices.recovery.max_concurrent_remote_store_streams"), Property.Dynamic, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index fab7620292dd2..5f10986239300 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -273,7 +273,12 @@ public ThreadPool( ); builders.put( Names.REMOTE_RECOVERY, - new ScalingExecutorBuilder(Names.REMOTE_RECOVERY, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)) + new ScalingExecutorBuilder( + Names.REMOTE_RECOVERY, + 1, + twiceAllocatedProcessors(allocatedProcessors), + TimeValue.timeValueMinutes(5) + ) ); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { builders.put( diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index ba2d4b8c247bb..19271bbf30e80 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -154,7 +154,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.TRANSLOG_SYNC, n -> 4 * n); sizes.put(ThreadPool.Names.REMOTE_PURGE, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.REMOTE_REFRESH_RETRY, ThreadPool::halfAllocatedProcessorsMaxTen); - sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::halfAllocatedProcessorsMaxTen); + sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::twiceAllocatedProcessors); return sizes.get(threadPoolName).apply(numberOfProcessors); } From 4f8bcff554fe3b019d4149ddcdcc634120c67dd3 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Fri, 20 Oct 2023 15:57:17 -0700 Subject: [PATCH 08/13] Return unformatted Segment Replication metrics that take upload time into account for replication lag (#10723) * Return unformatted segrep metrics in stats Signed-off-by: Poojita Raj * Take upload time into account for replication time lag Signed-off-by: Poojita Raj * unformat segrep stats Signed-off-by: Poojita Raj * remove unused field names Signed-off-by: Poojita Raj --------- Signed-off-by: Poojita Raj --- .../main/java/org/opensearch/index/ReplicationStats.java | 8 +++----- .../main/java/org/opensearch/index/shard/IndexShard.java | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/ReplicationStats.java b/server/src/main/java/org/opensearch/index/ReplicationStats.java index 9cc6685c75f80..0ae4526365bf1 100644 --- a/server/src/main/java/org/opensearch/index/ReplicationStats.java +++ b/server/src/main/java/org/opensearch/index/ReplicationStats.java @@ -8,11 +8,9 @@ package org.opensearch.index; -import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; @@ -76,9 +74,9 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.SEGMENT_REPLICATION); - builder.field(Fields.MAX_BYTES_BEHIND, new ByteSizeValue(maxBytesBehind).toString()); - builder.field(Fields.TOTAL_BYTES_BEHIND, new ByteSizeValue(totalBytesBehind).toString()); - builder.field(Fields.MAX_REPLICATION_LAG, new TimeValue(maxReplicationLag)); + builder.field(Fields.MAX_BYTES_BEHIND, maxBytesBehind); + builder.field(Fields.TOTAL_BYTES_BEHIND, totalBytesBehind); + builder.field(Fields.MAX_REPLICATION_LAG, maxReplicationLag); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 1b7d1b2716979..f990a3b56e856 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3010,7 +3010,7 @@ public ReplicationStats getReplicationStats() { long maxBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).max().orElse(0L); long totalBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).sum(); long maxReplicationLag = stats.stream() - .mapToLong(SegmentReplicationShardStats::getCurrentReplicationTimeMillis) + .mapToLong(SegmentReplicationShardStats::getCurrentReplicationLagMillis) .max() .orElse(0L); return new ReplicationStats(maxBytesBehind, totalBytesBehind, maxReplicationLag); From 1e9ec52dd9d5e7d4a3ffb7d37c8b7fdf4069c26d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:10:31 -0700 Subject: [PATCH 09/13] [Segment Replication] Fix Flaky test SegmentReplicationRelocationIT.testPrimaryRelocation (#10701) * Add primary mode check before assserting on primary mode. Signed-off-by: Rishikesh1159 * remove unnecessary shardRouting check. Signed-off-by: Rishikesh1159 * Add test logging. Signed-off-by: Rishikesh1159 * Addressing comments on PR. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- .../replication/SegmentReplicationRelocationIT.java | 2 ++ .../replication/SegmentReplicationSourceHandler.java | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java index dd832a63d1e66..dbe0b43441f54 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java @@ -26,6 +26,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.TransportService; @@ -55,6 +56,7 @@ private void createIndex(int replicaCount) { * This test verifies happy path when primary shard is relocated newly added node (target) in the cluster. Before * relocation and after relocation documents are indexed and documents are verified */ + @TestLogging(reason = "Getting trace logs from replication,shard and allocation package", value = "org.opensearch.indices.replication:TRACE, org.opensearch.index.shard:TRACE, org.opensearch.cluster.routing.allocation:TRACE") public void testPrimaryRelocation() throws Exception { final String oldPrimary = internalCluster().startNode(); createIndex(1); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java index e2c47b0fb3159..674c09311c645 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java @@ -12,8 +12,6 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.ListenableFuture; @@ -22,7 +20,6 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.recovery.DelayRecoveryException; import org.opensearch.indices.recovery.FileChunkWriter; import org.opensearch.indices.recovery.MultiChunkTransfer; import org.opensearch.indices.replication.common.CopyState; @@ -146,12 +143,6 @@ public synchronized void sendFiles(GetSegmentFilesRequest request, ActionListene ); }; cancellableThreads.checkForCancel(); - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.getTargetAllocationId()); - if (targetShardRouting == null) { - logger.debug("delaying replication of {} as it is not listed as assigned to target node {}", shard.shardId(), targetNode); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } final StepListener sendFileStep = new StepListener<>(); Set storeFiles = new HashSet<>(Arrays.asList(shard.store().directory().listAll())); From 51626d03f857ca840280d1e57cfb1bdfbba75e2d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:13:43 -0700 Subject: [PATCH 10/13] [Segment Replication] Add Segment Replication backpressure rejection stats to _nodes/stats (#10656) * Initial WIP for adding segrep backpressure to node stats. Signed-off-by: Rishikesh1159 * Bind SegmentReplicarionStatsTracker in Node.java Signed-off-by: Rishikesh1159 * remove additional segrep backpressure info from node stats Signed-off-by: Rishikesh1159 * fix metric name in node stats Signed-off-by: Rishikesh1159 * Fix compile error. Signed-off-by: Rishikesh1159 * Fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update java docs. Signed-off-by: Rishikesh1159 * Address comments on PR and fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update unit test. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 Signed-off-by: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> --- .../admin/cluster/node/stats/NodeStats.java | 24 +++++++ .../cluster/node/stats/NodesStatsRequest.java | 1 + .../node/stats/TransportNodesStatsAction.java | 1 + .../stats/TransportClusterStatsAction.java | 1 + .../SegmentReplicationPressureService.java | 3 +- .../SegmentReplicationRejectionStats.java | 67 +++++++++++++++++++ .../index/SegmentReplicationStatsTracker.java | 8 +++ .../main/java/org/opensearch/node/Node.java | 4 ++ .../java/org/opensearch/node/NodeService.java | 7 ++ .../cluster/node/stats/NodeStatsTests.java | 18 +++++ .../opensearch/cluster/DiskUsageTests.java | 6 ++ ...egmentReplicationPressureServiceTests.java | 9 ++- .../SegmentReplicationStatsTrackerTests.java | 35 ++++++++++ .../snapshots/SnapshotResiliencyTests.java | 2 + .../MockInternalClusterInfoService.java | 1 + .../opensearch/test/InternalTestCluster.java | 1 + 16 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java create mode 100644 server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index e9bfa358103c8..3d37056956c69 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -129,6 +130,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchBackpressureStats searchBackpressureStats; + @Nullable + private SegmentReplicationRejectionStats segmentReplicationRejectionStats; + @Nullable private ClusterManagerThrottlingStats clusterManagerThrottlingStats; @@ -211,6 +215,12 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); + } else { + segmentReplicationRejectionStats = null; + } if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -244,6 +254,7 @@ public NodeStats( @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, + @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats ) { super(node); @@ -271,6 +282,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; } @@ -415,6 +427,10 @@ public SearchPipelineStats getSearchPipelineStats() { } @Nullable + public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { + return segmentReplicationRejectionStats; + } + public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } @@ -465,6 +481,10 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(segmentReplicationRejectionStats); + } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -561,6 +581,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getSegmentReplicationRejectionStats() != null) { + getSegmentReplicationRejectionStats().toXContent(builder, params); + } + if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 88dff20354aa2..fc72668d36413 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -215,6 +215,7 @@ public enum Metric { TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), + SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index aa02f8e580f4a..99cf42cfdc4d0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -126,6 +126,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index f51fabbfb2388..5efec8b876435 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -170,6 +170,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java index 4284daf9ffef4..d9d480e7b2b27 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java @@ -106,10 +106,11 @@ public SegmentReplicationPressureService( ClusterService clusterService, IndicesService indicesService, ShardStateAction shardStateAction, + SegmentReplicationStatsTracker tracker, ThreadPool threadPool ) { this.indicesService = indicesService; - this.tracker = new SegmentReplicationStatsTracker(this.indicesService); + this.tracker = tracker; this.shardStateAction = shardStateAction; this.threadPool = threadPool; diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java new file mode 100644 index 0000000000000..9f9f150ebe2d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Segment replication rejection stats. + * + * @opensearch.internal + */ +public class SegmentReplicationRejectionStats implements Writeable, ToXContentFragment { + + /** + * Total rejections due to segment replication backpressure + */ + private long totalRejectionCount; + + public SegmentReplicationRejectionStats(final long totalRejectionCount) { + this.totalRejectionCount = totalRejectionCount; + } + + public SegmentReplicationRejectionStats(StreamInput in) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalRejectionCount = in.readVLong(); + } + } + + public long getTotalRejectionCount() { + return totalRejectionCount; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("segment_replication_backpressure"); + builder.field("total_rejected_requests", totalRejectionCount); + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeVLong(totalRejectionCount); + } + } + + @Override + public String toString() { + return "SegmentReplicationRejectionStats{ totalRejectedRequestCount=" + totalRejectionCount + '}'; + } + +} diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java index 6d5c00c08caff..f5fc8aa1c1eea 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java @@ -33,6 +33,14 @@ public SegmentReplicationStatsTracker(IndicesService indicesService) { rejectionCount = ConcurrentCollections.newConcurrentMap(); } + public SegmentReplicationRejectionStats getTotalRejectionStats() { + return new SegmentReplicationRejectionStats(this.rejectionCount.values().stream().mapToInt(AtomicInteger::get).sum()); + } + + protected Map getRejectionCount() { + return rejectionCount; + } + public SegmentReplicationStats getStats() { Map stats = new HashMap<>(); for (IndexService indexService : indicesService) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c9148f382a028..711a90d424ac3 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; @@ -977,6 +978,7 @@ protected Node( transportService.getTaskManager() ); + final SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); RepositoriesModule repositoriesModule = new RepositoriesModule( this.environment, pluginsService.filterPlugins(RepositoryPlugin.class), @@ -1116,6 +1118,7 @@ protected Node( fileCache, taskCancellationMonitoringService, resourceUsageCollectorService, + segmentReplicationStatsTracker, repositoryService ); @@ -1246,6 +1249,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); + b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); }); injector = modules.createInjector(); diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index e2d7bc2c86ba3..49dde0b81cac7 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -48,6 +48,7 @@ import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -96,6 +97,8 @@ public class NodeService implements Closeable { private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + NodeService( Settings settings, ThreadPool threadPool, @@ -119,6 +122,7 @@ public class NodeService implements Closeable { FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, + SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService ) { this.settings = settings; @@ -146,6 +150,7 @@ public class NodeService implements Closeable { this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); + this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; } public NodeInfo info( @@ -226,6 +231,7 @@ public NodeStats stats( boolean taskCancellation, boolean searchPipelineStats, boolean resourceUsageStats, + boolean segmentReplicationTrackerStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -256,6 +262,7 @@ public NodeStats stats( fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, + segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 3491f18da9550..ebdd012006fb2 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -49,6 +49,7 @@ import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.RemoteTranslogStats; @@ -417,6 +418,17 @@ public void testSerialization() throws IOException { assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); }); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = nodeStats.getSegmentReplicationRejectionStats(); + SegmentReplicationRejectionStats deserializedSegmentReplicationRejectionStats = deserializedNodeStats + .getSegmentReplicationRejectionStats(); + if (segmentReplicationRejectionStats == null) { + assertNull(deserializedSegmentReplicationRejectionStats); + } else { + assertEquals( + segmentReplicationRejectionStats.getTotalRejectionCount(), + deserializedSegmentReplicationRejectionStats.getTotalRejectionCount() + ); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -812,6 +824,11 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = null; + if (frequently()) { + segmentReplicationRejectionStats = new SegmentReplicationRejectionStats(randomNonNegativeLong()); + } + ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -853,6 +870,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { null, null, null, + segmentReplicationRejectionStats, null ); } diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 6f03e87bf5824..f037b75dc16a3 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -192,6 +192,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -220,6 +221,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -248,6 +250,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -307,6 +310,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -335,6 +339,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -363,6 +368,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java index 34fa13f0ba62c..478fdcb24f76a 100644 --- a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java @@ -278,6 +278,13 @@ private SegmentReplicationPressureService buildPressureService(Settings settings ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); + return new SegmentReplicationPressureService( + settings, + clusterService, + indicesService, + shardStateAction, + new SegmentReplicationStatsTracker(indicesService), + mock(ThreadPool.class) + ); } } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java new file mode 100644 index 0000000000000..04423d583e8f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; + +public class SegmentReplicationStatsTrackerTests extends OpenSearchTestCase { + + private IndicesService indicesService = mock(IndicesService.class); + + public void testRejectedCount() { + SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); + + // Verify that total rejection count is 0 on an empty rejectionCount map in statsTracker. + assertTrue(segmentReplicationStatsTracker.getRejectionCount().isEmpty()); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 0L); + + // Verify that total rejection count is 1 after incrementing rejectionCount. + segmentReplicationStatsTracker.incrementRejectionCount(Mockito.mock(ShardId.class)); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 1L); + } + +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 710717532ceb4..b7a2baacba611 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -178,6 +178,7 @@ import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2188,6 +2189,7 @@ public void onFailure(final Exception e) { clusterService, mock(IndicesService.class), mock(ShardStateAction.class), + mock(SegmentReplicationStatsTracker.class), mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 60a54110fd0b4..2ba4de5e54a67 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -122,6 +122,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), + nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 898e125b94954..63d8f069bebea 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2722,6 +2722,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( From 7c5a806d5bbee77c0c4a184a500bf5522a8d8cd7 Mon Sep 17 00:00:00 2001 From: Movva Ajaykumar Date: Sat, 21 Oct 2023 13:31:09 +0530 Subject: [PATCH 11/13] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) * Changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) Signed-off-by: Ajay Kumar Movva --- CHANGELOG.md | 1 + .../TransportReplicationAction.java | 10 +- .../common/network/NetworkModule.java | 16 +- .../common/settings/ClusterSettings.java | 8 +- .../main/java/org/opensearch/node/Node.java | 19 +- .../AdmissionControlService.java | 104 +++++++++ .../AdmissionControlSettings.java | 83 ++++++++ .../controllers/AdmissionController.java | 70 ++++++ .../CPUBasedAdmissionController.java | 55 +++++ .../controllers/package-info.java | 12 ++ .../enums/AdmissionControlMode.java | 66 ++++++ .../enums/TransportActionType.java | 45 ++++ .../admissioncontrol/enums/package-info.java | 12 ++ .../admissioncontrol/package-info.java | 12 ++ .../CPUBasedAdmissionControllerSettings.java | 110 ++++++++++ .../settings/package-info.java | 11 + .../AdmissionControlTransportHandler.java | 65 ++++++ .../AdmissionControlTransportInterceptor.java | 40 ++++ .../transport/package-info.java | 11 + .../ratelimitting/package-info.java | 12 ++ .../common/network/NetworkModuleTests.java | 199 +++++++++++++++++- .../AdmissionControlServiceTests.java | 140 ++++++++++++ .../AdmissionControlSettingsTests.java | 103 +++++++++ .../CPUBasedAdmissionControllerTests.java | 109 ++++++++++ .../enums/AdmissionControlModeTests.java | 29 +++ .../enums/TransportActionTypeTests.java | 27 +++ ...CPUBasedAdmissionControlSettingsTests.java | 153 ++++++++++++++ ...AdmissionControlTransportHandlerTests.java | 92 ++++++++ 28 files changed, 1595 insertions(+), 19 deletions(-) create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/package-info.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d7d492679c79d..374dd4ab57ee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java index b68bd13cfed80..ddebdc5530e70 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java @@ -134,6 +134,12 @@ public abstract class TransportReplicationAction< Setting.Property.NodeScope ); + /** + * Making primary and replica actions suffixes as constant + */ + public static final String PRIMARY_ACTION_SUFFIX = "[p]"; + public static final String REPLICA_ACTION_SUFFIX = "[r]"; + protected final ThreadPool threadPool; protected final TransportService transportService; protected final ClusterService clusterService; @@ -204,8 +210,8 @@ protected TransportReplicationAction( this.shardStateAction = shardStateAction; this.executor = executor; - this.transportPrimaryAction = actionName + "[p]"; - this.transportReplicaAction = actionName + "[r]"; + this.transportPrimaryAction = actionName + PRIMARY_ACTION_SUFFIX; + this.transportReplicaAction = actionName + REPLICA_ACTION_SUFFIX; this.initialRetryBackoffBound = REPLICATION_INITIAL_RETRY_BACKOFF_BOUND.get(settings); this.retryTimeout = REPLICATION_RETRY_TIMEOUT.get(settings); diff --git a/server/src/main/java/org/opensearch/common/network/NetworkModule.java b/server/src/main/java/org/opensearch/common/network/NetworkModule.java index 0734659d8ee72..821d48fccf48c 100644 --- a/server/src/main/java/org/opensearch/common/network/NetworkModule.java +++ b/server/src/main/java/org/opensearch/common/network/NetworkModule.java @@ -131,7 +131,7 @@ public final class NetworkModule { private final Map> transportFactories = new HashMap<>(); private final Map> transportHttpFactories = new HashMap<>(); - private final List transportIntercetors = new ArrayList<>(); + private final List transportInterceptors = new ArrayList<>(); /** * Creates a network module that custom networking classes can be plugged into. @@ -149,9 +149,13 @@ public NetworkModule( NetworkService networkService, HttpServerTransport.Dispatcher dispatcher, ClusterSettings clusterSettings, - Tracer tracer + Tracer tracer, + List transportInterceptors ) { this.settings = settings; + if (transportInterceptors != null) { + transportInterceptors.forEach(this::registerTransportInterceptor); + } for (NetworkPlugin plugin : plugins) { Map> httpTransportFactory = plugin.getHttpTransports( settings, @@ -180,11 +184,11 @@ public NetworkModule( for (Map.Entry> entry : transportFactory.entrySet()) { registerTransport(entry.getKey(), entry.getValue()); } - List transportInterceptors = plugin.getTransportInterceptors( + List pluginTransportInterceptors = plugin.getTransportInterceptors( namedWriteableRegistry, threadPool.getThreadContext() ); - for (TransportInterceptor interceptor : transportInterceptors) { + for (TransportInterceptor interceptor : pluginTransportInterceptors) { registerTransportInterceptor(interceptor); } } @@ -264,7 +268,7 @@ public Supplier getTransportSupplier() { * Registers a new {@link TransportInterceptor} */ private void registerTransportInterceptor(TransportInterceptor interceptor) { - this.transportIntercetors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); + this.transportInterceptors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); } /** @@ -272,7 +276,7 @@ private void registerTransportInterceptor(TransportInterceptor interceptor) { * @see #registerTransportInterceptor(TransportInterceptor) */ public TransportInterceptor getTransportInterceptor() { - return new CompositeTransportInterceptor(this.transportIntercetors); + return new CompositeTransportInterceptor(this.transportInterceptors); } static final class CompositeTransportInterceptor implements TransportInterceptor { diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 76883c200542e..7ac7da819b215 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -134,6 +134,8 @@ import org.opensearch.persistent.PersistentTasksClusterService; import org.opensearch.persistent.decider.EnableAssignmentDecider; import org.opensearch.plugins.PluginsService; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.rest.BaseRestHandler; import org.opensearch.script.ScriptService; @@ -682,7 +684,11 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, - IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING + IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT ) ) ); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 711a90d424ac3..e80b768074fc7 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -197,6 +197,8 @@ import org.opensearch.plugins.SearchPlugin; import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.plugins.TelemetryPlugin; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.transport.AdmissionControlTransportInterceptor; import org.opensearch.repositories.RepositoriesModule; import org.opensearch.repositories.RepositoriesService; import org.opensearch.rest.RestController; @@ -820,6 +822,7 @@ protected Node( remoteStoreStatsTrackerFactory, recoverySettings ); + final AliasValidator aliasValidator = new AliasValidator(); final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); @@ -891,6 +894,17 @@ protected Node( final RestController restController = actionModule.getRestController(); + final AdmissionControlService admissionControlService = new AdmissionControlService( + settings, + clusterService.getClusterSettings(), + threadPool + ); + + AdmissionControlTransportInterceptor admissionControlTransportInterceptor = new AdmissionControlTransportInterceptor( + admissionControlService + ); + + List transportInterceptors = List.of(admissionControlTransportInterceptor); final NetworkModule networkModule = new NetworkModule( settings, pluginsService.filterPlugins(NetworkPlugin.class), @@ -903,8 +917,10 @@ protected Node( networkService, restController, clusterService.getClusterSettings(), - tracer + tracer, + transportInterceptors ); + Collection>> indexTemplateMetadataUpgraders = pluginsService.filterPlugins( Plugin.class ).stream().map(Plugin::getIndexTemplateMetadataUpgrader).collect(Collectors.toList()); @@ -1181,6 +1197,7 @@ protected Node( b.bind(IndexingPressureService.class).toInstance(indexingPressureService); b.bind(TaskResourceTrackingService.class).toInstance(taskResourceTrackingService); b.bind(SearchBackpressureService.class).toInstance(searchBackpressureService); + b.bind(AdmissionControlService.class).toInstance(admissionControlService); b.bind(UsageService.class).toInstance(usageService); b.bind(AggregationUsageService.class).toInstance(searchModule.getValuesSourceRegistry().getUsageService()); b.bind(NamedWriteableRegistry.class).toInstance(namedWriteableRegistry); diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java new file mode 100644 index 0000000000000..2cc409b0e4465 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.threadpool.ThreadPool; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER; + +/** + * Admission control Service that bootstraps and manages all the Admission Controllers in OpenSearch. + */ +public class AdmissionControlService { + private final ThreadPool threadPool; + public final AdmissionControlSettings admissionControlSettings; + private final ConcurrentMap ADMISSION_CONTROLLERS; + private static final Logger logger = LogManager.getLogger(AdmissionControlService.class); + private final ClusterSettings clusterSettings; + private final Settings settings; + + /** + * + * @param settings Immutable settings instance + * @param clusterSettings ClusterSettings Instance + * @param threadPool ThreadPool Instance + */ + public AdmissionControlService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this.threadPool = threadPool; + this.admissionControlSettings = new AdmissionControlSettings(clusterSettings, settings); + this.ADMISSION_CONTROLLERS = new ConcurrentHashMap<>(); + this.clusterSettings = clusterSettings; + this.settings = settings; + this.initialise(); + } + + /** + * Initialise and Register all the admissionControllers + */ + private void initialise() { + // Initialise different type of admission controllers + registerAdmissionController(CPU_BASED_ADMISSION_CONTROLLER); + } + + /** + * Handler to trigger registered admissionController + */ + public void applyTransportAdmissionControl(String action) { + this.ADMISSION_CONTROLLERS.forEach((name, admissionController) -> { admissionController.apply(action); }); + } + + /** + * + * @param admissionControllerName admissionControllerName to register into the service. + */ + public void registerAdmissionController(String admissionControllerName) { + AdmissionController admissionController = this.controllerFactory(admissionControllerName); + this.ADMISSION_CONTROLLERS.put(admissionControllerName, admissionController); + } + + /** + * @return AdmissionController Instance + */ + private AdmissionController controllerFactory(String admissionControllerName) { + switch (admissionControllerName) { + case CPU_BASED_ADMISSION_CONTROLLER: + return new CPUBasedAdmissionController(admissionControllerName, this.settings, this.clusterSettings); + default: + throw new IllegalArgumentException("Not Supported AdmissionController : " + admissionControllerName); + } + } + + /** + * + * @return list of the registered admissionControllers + */ + public List getAdmissionControllers() { + return new ArrayList<>(this.ADMISSION_CONTROLLERS.values()); + } + + /** + * + * @param controllerName name of the admissionController + * @return instance of the AdmissionController Instance + */ + public AdmissionController getAdmissionController(String controllerName) { + return this.ADMISSION_CONTROLLERS.getOrDefault(controllerName, null); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java new file mode 100644 index 0000000000000..b557190ab54ac --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +/** + * Settings related to admission control. + * @opensearch.internal + */ +public final class AdmissionControlSettings { + + /** + * Default parameters for the AdmissionControlSettings + */ + public static class Defaults { + public static final String MODE = "disabled"; + } + + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting ADMISSION_CONTROL_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.mode", + Defaults.MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + private volatile AdmissionControlMode transportLayeradmissionControlMode; + + /** + * @param clusterSettings clusterSettings Instance + * @param settings settings instance + */ + public AdmissionControlSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayeradmissionControlMode = ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, this::setAdmissionControlTransportLayerMode); + } + + /** + * + * @param admissionControlMode update the mode of admission control feature + */ + private void setAdmissionControlTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayeradmissionControlMode = admissionControlMode; + } + + /** + * + * @return return the default mode of the admissionControl + */ + public AdmissionControlMode getAdmissionControlTransportLayerMode() { + return this.transportLayeradmissionControlMode; + } + + /** + * + * @return true based on the admission control feature is enforced else false + */ + public Boolean isTransportLayerAdmissionControlEnforced() { + return this.transportLayeradmissionControlMode == AdmissionControlMode.ENFORCED; + } + + /** + * + * @return true based on the admission control feature is enabled else false + */ + public Boolean isTransportLayerAdmissionControlEnabled() { + return this.transportLayeradmissionControlMode != AdmissionControlMode.DISABLED; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java new file mode 100644 index 0000000000000..00564a9967f31 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Abstract class for Admission Controller in OpenSearch, which aims to provide resource based request admission control. + * It provides methods for any tracking-object that can be incremented (such as memory size), + * and admission control can be applied if configured limit has been reached + */ +public abstract class AdmissionController { + + private final AtomicLong rejectionCount; + private final String admissionControllerName; + + /** + * + * @param rejectionCount initialised rejectionCount value for AdmissionController + * @param admissionControllerName name of the admissionController + */ + public AdmissionController(AtomicLong rejectionCount, String admissionControllerName) { + this.rejectionCount = rejectionCount; + this.admissionControllerName = admissionControllerName; + } + + /** + * Return the current state of the admission controller + * @return true if admissionController is enabled for the transport layer else false + */ + public boolean isEnabledForTransportLayer(AdmissionControlMode admissionControlMode) { + return admissionControlMode != AdmissionControlMode.DISABLED; + } + + /** + * Increment the tracking-objects and apply the admission control if threshold is breached. + * Mostly applicable while applying admission controller + */ + public abstract void apply(String action); + + /** + * @return name of the admission-controller + */ + public String getName() { + return this.admissionControllerName; + } + + /** + * Adds the rejection count for the controller. Primarily used when copying controller states. + * @param count To add the value of the tracking resource object as the provided count + */ + public void addRejectionCount(long count) { + this.rejectionCount.addAndGet(count); + } + + /** + * @return current value of the rejection count metric tracked by the admission-controller. + */ + public long getRejectionCount() { + return this.rejectionCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java new file mode 100644 index 0000000000000..3a8956b2cce87 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Class for CPU Based Admission Controller in OpenSearch, which aims to provide CPU utilisation admission control. + * It provides methods to apply admission control if configured limit has been reached + */ +public class CPUBasedAdmissionController extends AdmissionController { + private static final Logger LOGGER = LogManager.getLogger(CPUBasedAdmissionController.class); + public CPUBasedAdmissionControllerSettings settings; + + /** + * + * @param admissionControllerName State of the admission controller + */ + public CPUBasedAdmissionController(String admissionControllerName, Settings settings, ClusterSettings clusterSettings) { + super(new AtomicLong(0), admissionControllerName); + this.settings = new CPUBasedAdmissionControllerSettings(clusterSettings, settings); + } + + /** + * This function will take of applying admission controller based on CPU usage + * @param action is the transport action + */ + @Override + public void apply(String action) { + // TODO Will extend this logic further currently just incrementing rejectionCount + if (this.isEnabledForTransportLayer(this.settings.getTransportLayerAdmissionControllerMode())) { + this.applyForTransportLayer(action); + } + } + + private void applyForTransportLayer(String actionName) { + // currently incrementing counts to evaluate the controller triggering as expected and using in testing so limiting to 10 + // TODO will update rejection logic further in next PR's + if (this.getRejectionCount() < 10) { + this.addRejectionCount(1); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java new file mode 100644 index 0000000000000..23746cc61a203 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains classes related to the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.controllers; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java new file mode 100644 index 0000000000000..2ae2436ba84e7 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Defines the AdmissionControlMode + */ +public enum AdmissionControlMode { + /** + * AdmissionController is completely disabled. + */ + DISABLED("disabled"), + + /** + * AdmissionController only monitors the rejection criteria for the requests. + */ + MONITOR("monitor_only"), + + /** + * AdmissionController monitors and rejects tasks that exceed resource usage thresholds. + */ + ENFORCED("enforced"); + + private final String mode; + + /** + * @param mode update mode of the admission controller + */ + AdmissionControlMode(String mode) { + this.mode = mode; + } + + /** + * + * @return mode of the admission controller + */ + public String getMode() { + return this.mode; + } + + /** + * + * @param name is the mode of the current + * @return Enum of AdmissionControlMode based on the mode + */ + public static AdmissionControlMode fromName(String name) { + switch (name.toLowerCase(Locale.ROOT)) { + case "disabled": + return DISABLED; + case "monitor_only": + return MONITOR; + case "enforced": + return ENFORCED; + default: + throw new IllegalArgumentException("Invalid AdmissionControlMode: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java new file mode 100644 index 0000000000000..f2fdca0cfe49b --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Enums that defines the type of the transport requests + */ +public enum TransportActionType { + INDEXING("indexing"), + SEARCH("search"); + + private final String type; + + TransportActionType(String uriType) { + this.type = uriType; + } + + /** + * + * @return type of the request + */ + public String getType() { + return type; + } + + public static TransportActionType fromName(String name) { + name = name.toLowerCase(Locale.ROOT); + switch (name) { + case "indexing": + return INDEXING; + case "search": + return SEARCH; + default: + throw new IllegalArgumentException("Not Supported TransportAction Type: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java new file mode 100644 index 0000000000000..98b08ebd0a7bf --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains enums related to the different admission controller feature + */ +package org.opensearch.ratelimitting.admissioncontrol.enums; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java new file mode 100644 index 0000000000000..b3dc229f86fb6 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains base classes needed for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java new file mode 100644 index 0000000000000..141e9b68db145 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.Arrays; +import java.util.List; + +/** + * Settings related to cpu based admission controller. + * @opensearch.internal + */ +public class CPUBasedAdmissionControllerSettings { + public static final String CPU_BASED_ADMISSION_CONTROLLER = "global_cpu_usage"; + + /** + * Default parameters for the CPUBasedAdmissionControllerSettings + */ + public static class Defaults { + public static final long CPU_USAGE = 95; + public static List TRANSPORT_LAYER_DEFAULT_URI_TYPE = Arrays.asList("indexing", "search"); + } + + private AdmissionControlMode transportLayerMode; + private Long searchCPULimit; + private Long indexingCPULimit; + + private final List transportActionsList; + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.cpu_usage.mode_override", + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU Limits for the search requests by default it will use default IO usage limit + */ + public static final Setting SEARCH_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.search.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU limits for the indexing requests by default it will use default IO usage limit + */ + public static final Setting INDEXING_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.indexing.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + // currently limited to one setting will add further more settings in follow-up PR's + public CPUBasedAdmissionControllerSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayerMode = CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, this::setTransportLayerMode); + this.searchCPULimit = SEARCH_CPU_USAGE_LIMIT.get(settings); + this.indexingCPULimit = INDEXING_CPU_USAGE_LIMIT.get(settings); + this.transportActionsList = Defaults.TRANSPORT_LAYER_DEFAULT_URI_TYPE; + clusterSettings.addSettingsUpdateConsumer(INDEXING_CPU_USAGE_LIMIT, this::setIndexingCPULimit); + clusterSettings.addSettingsUpdateConsumer(SEARCH_CPU_USAGE_LIMIT, this::setSearchCPULimit); + } + + private void setTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayerMode = admissionControlMode; + } + + public AdmissionControlMode getTransportLayerAdmissionControllerMode() { + return transportLayerMode; + } + + public Long getSearchCPULimit() { + return searchCPULimit; + } + + public Long getIndexingCPULimit() { + return indexingCPULimit; + } + + public void setIndexingCPULimit(Long indexingCPULimit) { + this.indexingCPULimit = indexingCPULimit; + } + + public void setSearchCPULimit(Long searchCPULimit) { + this.searchCPULimit = searchCPULimit; + } + + public List getTransportActionsList() { + return transportActionsList; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java new file mode 100644 index 0000000000000..a024ccc756745 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains settings related classes for the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.settings; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java new file mode 100644 index 0000000000000..7d0f5fbc17a51 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * AdmissionControl Handler to intercept Transport Requests. + * @param Transport Request + */ +public class AdmissionControlTransportHandler implements TransportRequestHandler { + + private final String action; + private final TransportRequestHandler actualHandler; + protected final Logger log = LogManager.getLogger(this.getClass()); + AdmissionControlService admissionControlService; + boolean forceExecution; + + public AdmissionControlTransportHandler( + String action, + TransportRequestHandler actualHandler, + AdmissionControlService admissionControlService, + boolean forceExecution + ) { + super(); + this.action = action; + this.actualHandler = actualHandler; + this.admissionControlService = admissionControlService; + this.forceExecution = forceExecution; + } + + /** + * @param request Transport Request that landed on the node + * @param channel Transport channel allows to send a response to a request + * @param task Current task that is executing + * @throws Exception when admission control rejected the requests + */ + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + // intercept all the transport requests here and apply admission control + try { + // TODO Need to evaluate if we need to apply admission control or not if force Execution is true will update in next PR. + this.admissionControlService.applyTransportAdmissionControl(this.action); + } catch (final OpenSearchRejectedExecutionException openSearchRejectedExecutionException) { + log.warn(openSearchRejectedExecutionException.getMessage()); + channel.sendResponse(openSearchRejectedExecutionException); + } catch (final Exception e) { + throw e; + } + actualHandler.messageReceived(request, channel, task); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java new file mode 100644 index 0000000000000..01cfcbd780006 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.transport.TransportInterceptor; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * This class allows throttling to intercept requests on both the sender and the receiver side. + */ +public class AdmissionControlTransportInterceptor implements TransportInterceptor { + + AdmissionControlService admissionControlService; + + public AdmissionControlTransportInterceptor(AdmissionControlService admissionControlService) { + this.admissionControlService = admissionControlService; + } + + /** + * + * @return admissionController handler to intercept transport requests + */ + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + return new AdmissionControlTransportHandler<>(action, actualHandler, this.admissionControlService, forceExecution); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java new file mode 100644 index 0000000000000..f97f31bc7b1db --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains transport related classes for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol.transport; diff --git a/server/src/main/java/org/opensearch/ratelimitting/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/package-info.java new file mode 100644 index 0000000000000..c04358e14284f --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Base OpenSearch Throttling package + */ +package org.opensearch.ratelimitting; diff --git a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java index 0ca118fe422a5..ab51cafb039c2 100644 --- a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java +++ b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java @@ -57,6 +57,7 @@ import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -124,7 +125,7 @@ public Map> getTransports( return Collections.singletonMap("custom", custom); } }; - NetworkModule module = newNetworkModule(settings, plugin); + NetworkModule module = newNetworkModule(settings, null, plugin); assertSame(custom, module.getTransportSupplier()); } @@ -135,7 +136,7 @@ public void testRegisterHttpTransport() { .build(); Supplier custom = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getHttpTransports( Settings settings, @@ -155,7 +156,7 @@ public Map> getHttpTransports( assertSame(custom, module.getHttpServerTransportSupplier()); settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); - NetworkModule newModule = newNetworkModule(settings); + NetworkModule newModule = newNetworkModule(settings, null); expectThrows(IllegalStateException.class, () -> newModule.getHttpServerTransportSupplier()); } @@ -169,7 +170,7 @@ public void testOverrideDefault() { Supplier customTransport = () -> null; // content doesn't matter we check reference equality Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -214,7 +215,7 @@ public void testDefaultKeys() { Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; Supplier customTransport = () -> null; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -273,7 +274,7 @@ public TransportRequestHandler interceptHandler( return actualHandler; } }; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -295,7 +296,7 @@ public List getTransportInterceptors( assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); NullPointerException nullPointerException = expectThrows(NullPointerException.class, () -> { - newNetworkModule(settings, new NetworkPlugin() { + newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -309,7 +310,186 @@ public List getTransportInterceptors( assertEquals("interceptor must not be null", nullPointerException.getMessage()); } - private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugins) { + public void testRegisterCoreInterceptor() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(0, called.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertTrue(transportInterceptor instanceof NetworkModule.CompositeTransportInterceptor); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 1); + assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); + } + + public void testInterceptorOrder() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + assertEquals(1, called1.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertEquals(2, called1.get()); + } + + public void testInterceptorOrderException() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + throw new RuntimeException("Handler Invoke Failed"); + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + try { + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(1, called1.get()); + } + try { + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(2, called1.get()); + } + } + + private NetworkModule newNetworkModule( + Settings settings, + List coreTransportInterceptors, + NetworkPlugin... plugins + ) { return new NetworkModule( settings, Arrays.asList(plugins), @@ -322,7 +502,8 @@ private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugi null, new NullDispatcher(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - NoopTracer.INSTANCE + NoopTracer.INSTANCE, + coreTransportInterceptors ); } } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java new file mode 100644 index 0000000000000..bac4eaf3fd677 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; + +public class AdmissionControlServiceTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + private AdmissionControlService admissionControlService; + private String action = ""; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + action = "indexing"; + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testWhenAdmissionControllerRegistered() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + } + + public void testRegisterInvalidAdmissionController() { + String test = "TEST"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> admissionControlService.registerAdmissionController(test) + ); + assertEquals(ex.getMessage(), "Not Supported AdmissionController : " + test); + } + + public void testAdmissionControllerSettings() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + AdmissionControlSettings admissionControlSettings = admissionControlService.admissionControlSettings; + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + CPUBasedAdmissionController cpuBasedAdmissionController = (CPUBasedAdmissionController) admissionControlService + .getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.DISABLED.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + + Settings newSettings = Settings.builder() + .put(settings) + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(newSettings); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue( + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + } + + public void testApplyAdmissionControllerDisabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + admissionControllerList.forEach(admissionController -> { assertEquals(admissionController.getRejectionCount(), 0); }); + } + + public void testApplyAdmissionControllerEnabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 0 + ); + + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 1 + ); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..c11ee1cc608f6 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; +import java.util.Set; + +public class AdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the admission controller settings should be supported built in settings", + settings.containsAll(List.of(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE)) + ); + } + + public void testDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + assertEquals(admissionControlSettings.getAdmissionControlTransportLayerMode().getMode(), AdmissionControlSettings.Defaults.MODE); + } + + public void testGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + Settings newSettings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + clusterService.getClusterSettings().applySettings(newSettings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java new file mode 100644 index 0000000000000..af6ec0749e709 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +public class CPUBasedAdmissionControllerTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + CPUBasedAdmissionController admissionController = null; + + String action = "TEST_ACTION"; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testCheckDefaultParameters() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertFalse( + admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode()) + ); + } + + public void testCheckUpdateSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + } + + public void testApplyControllerWithDefaultSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 0); + } + + public void testApplyControllerWhenSettingsEnabled() { + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + settings, + clusterService.getClusterSettings() + ); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + assertEquals(admissionController.getRejectionCount(), 0); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 1); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java new file mode 100644 index 0000000000000..98c0f3c7cf24c --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class AdmissionControlModeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(AdmissionControlMode.DISABLED.getMode(), "disabled"); + assertEquals(AdmissionControlMode.ENFORCED.getMode(), "enforced"); + assertEquals(AdmissionControlMode.MONITOR.getMode(), "monitor_only"); + assertEquals(AdmissionControlMode.fromName("disabled"), AdmissionControlMode.DISABLED); + assertEquals(AdmissionControlMode.fromName("enforced"), AdmissionControlMode.ENFORCED); + assertEquals(AdmissionControlMode.fromName("monitor_only"), AdmissionControlMode.MONITOR); + } + + public void testInValidActionType() { + String name = "TEST"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AdmissionControlMode.fromName(name)); + assertEquals(ex.getMessage(), "Invalid AdmissionControlMode: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java new file mode 100644 index 0000000000000..02f582c26f54e --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class TransportActionTypeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(TransportActionType.SEARCH.getType(), "search"); + assertEquals(TransportActionType.INDEXING.getType(), "indexing"); + assertEquals(TransportActionType.fromName("search"), TransportActionType.SEARCH); + assertEquals(TransportActionType.fromName("indexing"), TransportActionType.INDEXING); + } + + public void testInValidActionType() { + String name = "test"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> TransportActionType.fromName(name)); + assertEquals(ex.getMessage(), "Not Supported TransportAction Type: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..43103926a69a2 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java @@ -0,0 +1,153 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.Arrays; +import java.util.Set; + +public class CPUBasedAdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the cpu based admission controller settings should be supported built in settings", + settings.containsAll( + Arrays.asList( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT + ) + ) + ); + } + + public void testDefaultSettings() { + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + long percent = 95; + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportActionsList(), Arrays.asList("indexing", "search")); + } + + public void testGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } + + public void testUpdateAfterGetDefaultSettings() { + long percent = 95; + long searchPercent = 80; + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + } + + public void testUpdateAfterGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + long searchPercent = 80; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + + Settings updatedSettings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.MONITOR); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + + searchPercent = 70; + + updatedSettings = Settings.builder() + .put(updatedSettings) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java new file mode 100644 index 0000000000000..03d4819a94045 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class AdmissionControlTransportHandlerTests extends OpenSearchTestCase { + AdmissionControlTransportHandler admissionControlTransportHandler; + + public void testHandlerInvoked() throws Exception { + String action = "TEST"; + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + mock(AdmissionControlService.class), + false + ); + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRejectedException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new OpenSearchRejectedExecutionException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (OpenSearchRejectedExecutionException exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRandomException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new NullPointerException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (Exception exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + private class InterceptingRequestHandler implements TransportRequestHandler { + private final String action; + public int count; + + public InterceptingRequestHandler(String action) { + this.action = action; + this.count = 0; + } + + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + this.count = this.count + 1; + } + } +} From 14d4a6389bd5a7612c14e2e6fcb5a39822af9ee0 Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 21 Oct 2023 18:42:52 +0530 Subject: [PATCH 12/13] [Remote Store] Use time elapsed since last successful local refresh for refresh lag (#10803) * [Remote Store] Use time elapsed since last successful local refresh for time lag Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- ...emoteStoreBackpressureAndResiliencyIT.java | 6 ++- .../remote/RemoteSegmentTransferTracker.java | 53 ++++++++++++------- .../remote/RemoteStorePressureService.java | 1 - .../RemoteSegmentTransferTrackerTests.java | 39 +++++++++----- .../RemoteStorePressureServiceTests.java | 32 ++++++++--- 5 files changed, 86 insertions(+), 45 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 98586b60dcc69..f19c9db7874db 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -56,7 +56,7 @@ public void testWritesRejectedDueToBytesLagBreach() throws Exception { public void testWritesRejectedDueToTimeLagBreach() throws Exception { // Initially indexing happens with doc size of 1KB, then all remote store interactions start failing. Now, the // indexing happens with doc size of 1 byte leading to time lag limit getting exceeded and leading to rejections. - validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 15, "time_lag"); + validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 3, "time_lag"); } private void validateBackpressure( @@ -133,11 +133,13 @@ private RemoteSegmentTransferTracker.Stats stats() { return matches.get(0).getSegmentStats(); } - private void indexDocAndRefresh(BytesReference source, int iterations) { + private void indexDocAndRefresh(BytesReference source, int iterations) throws InterruptedException { for (int i = 0; i < iterations; i++) { client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); refresh(INDEX_NAME); } + Thread.sleep(250); + client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); } /** diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 2a703f17aa953..fb65d9ef83be2 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -66,6 +67,12 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long remoteRefreshTimeMs; + /** + * This is the time of first local refresh after the last successful remote refresh. When the remote store is in + * sync with local refresh, this will be reset to -1. + */ + private volatile long remoteRefreshStartTimeMs = -1; + /** * The refresh time(clock) of most recent remote refresh. */ @@ -76,11 +83,6 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long refreshSeqNoLag; - /** - * Keeps the time (ms) lag computed so that we do not compute it for every request. - */ - private volatile long timeMsLag; - /** * Keeps track of the total bytes of segment files which were uploaded to remote store during last successful remote refresh */ @@ -132,14 +134,19 @@ public RemoteSegmentTransferTracker( logger = Loggers.getLogger(getClass(), shardId); // Both the local refresh time and remote refresh time are set with current time to give consistent view of time lag when it arises. long currentClockTimeMs = System.currentTimeMillis(); - long currentTimeMs = System.nanoTime() / 1_000_000L; + long currentTimeMs = currentTimeMsUsingSystemNanos(); localRefreshTimeMs = currentTimeMs; remoteRefreshTimeMs = currentTimeMs; + remoteRefreshStartTimeMs = currentTimeMs; localRefreshClockTimeMs = currentClockTimeMs; remoteRefreshClockTimeMs = currentClockTimeMs; this.directoryFileTransferTracker = directoryFileTransferTracker; } + public static long currentTimeMsUsingSystemNanos() { + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime()); + } + @Override public void incrementTotalUploadsFailed() { super.incrementTotalUploadsFailed(); @@ -180,19 +187,22 @@ public long getLocalRefreshClockTimeMs() { */ public void updateLocalRefreshTimeAndSeqNo() { updateLocalRefreshClockTimeMs(System.currentTimeMillis()); - updateLocalRefreshTimeMs(System.nanoTime() / 1_000_000L); + updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); updateLocalRefreshSeqNo(getLocalRefreshSeqNo() + 1); } // Visible for testing - void updateLocalRefreshTimeMs(long localRefreshTimeMs) { + synchronized void updateLocalRefreshTimeMs(long localRefreshTimeMs) { assert localRefreshTimeMs >= this.localRefreshTimeMs : "newLocalRefreshTimeMs=" + localRefreshTimeMs + " < " + "currentLocalRefreshTimeMs=" + this.localRefreshTimeMs; + boolean isRemoteInSyncBeforeLocalRefresh = this.localRefreshTimeMs == this.remoteRefreshTimeMs; this.localRefreshTimeMs = localRefreshTimeMs; - computeTimeMsLag(); + if (isRemoteInSyncBeforeLocalRefresh) { + this.remoteRefreshStartTimeMs = localRefreshTimeMs; + } } private void updateLocalRefreshClockTimeMs(long localRefreshClockTimeMs) { @@ -221,14 +231,18 @@ long getRemoteRefreshClockTimeMs() { return remoteRefreshClockTimeMs; } - public void updateRemoteRefreshTimeMs(long remoteRefreshTimeMs) { - assert remoteRefreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" - + remoteRefreshTimeMs + public synchronized void updateRemoteRefreshTimeMs(long refreshTimeMs) { + assert refreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" + + refreshTimeMs + " < " + "currentRemoteRefreshTimeMs=" + this.remoteRefreshTimeMs; - this.remoteRefreshTimeMs = remoteRefreshTimeMs; - computeTimeMsLag(); + this.remoteRefreshTimeMs = refreshTimeMs; + // When multiple refreshes have failed, there is a possibility that retry is ongoing while another refresh gets + // triggered. After the segments have been uploaded and before the below code runs, the updateLocalRefreshTimeAndSeqNo + // method is triggered, which will update the local localRefreshTimeMs. Now, the lag would basically become the + // time since the last refresh happened locally. + this.remoteRefreshStartTimeMs = refreshTimeMs == this.localRefreshTimeMs ? -1 : this.localRefreshTimeMs; } public void updateRemoteRefreshClockTimeMs(long remoteRefreshClockTimeMs) { @@ -243,12 +257,11 @@ public long getRefreshSeqNoLag() { return refreshSeqNoLag; } - private void computeTimeMsLag() { - timeMsLag = localRefreshTimeMs - remoteRefreshTimeMs; - } - public long getTimeMsLag() { - return timeMsLag; + if (remoteRefreshTimeMs == localRefreshTimeMs) { + return 0; + } + return currentTimeMsUsingSystemNanos() - remoteRefreshStartTimeMs; } public long getBytesLag() { @@ -354,7 +367,7 @@ public RemoteSegmentTransferTracker.Stats stats() { shardId, localRefreshClockTimeMs, remoteRefreshClockTimeMs, - timeMsLag, + getTimeMsLag(), localRefreshSeqNo, remoteRefreshSeqNo, uploadBytesStarted.get(), diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java index 2920b33921869..33cd40f802d43 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java @@ -180,7 +180,6 @@ public boolean validate(RemoteSegmentTransferTracker pressureTracker, ShardId sh return true; } if (pressureTracker.isUploadTimeMovingAverageReady() == false) { - logger.trace("upload time moving average is not ready"); return true; } long timeLag = pressureTracker.getTimeMsLag(); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java index 0bf00f9e48137..c87cdfcc8f1a1 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.Map; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; + public class RemoteSegmentTransferTrackerTests extends OpenSearchTestCase { private RemoteStoreStatsTrackerFactory remoteStoreStatsTrackerFactory; private ClusterService clusterService; @@ -92,7 +94,7 @@ public void testUpdateLocalRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000L + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateLocalRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getLocalRefreshTimeMs()); } @@ -103,7 +105,7 @@ public void testUpdateRemoteRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000 + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateRemoteRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getRemoteRefreshTimeMs()); } @@ -133,20 +135,29 @@ public void testComputeSeqNoLagOnUpdate() { assertEquals(localRefreshSeqNo - remoteRefreshSeqNo, transferTracker.getRefreshSeqNoLag()); } - public void testComputeTimeLagOnUpdate() { + public void testComputeTimeLagOnUpdate() throws InterruptedException { transferTracker = new RemoteSegmentTransferTracker( shardId, directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long currentLocalRefreshTimeMs = transferTracker.getLocalRefreshTimeMs(); - long currentTimeMs = System.nanoTime() / 1_000_000L; - long localRefreshTimeMs = currentTimeMs + randomIntBetween(100, 500); - long remoteRefreshTimeMs = currentTimeMs + randomIntBetween(50, 99); - transferTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); - assertEquals(localRefreshTimeMs - currentLocalRefreshTimeMs, transferTracker.getTimeMsLag()); - transferTracker.updateRemoteRefreshTimeMs(remoteRefreshTimeMs); - assertEquals(localRefreshTimeMs - remoteRefreshTimeMs, transferTracker.getTimeMsLag()); + + // No lag if there is a remote upload corresponding to a local refresh + assertEquals(0, transferTracker.getTimeMsLag()); + + // Set a local refresh time that is higher than remote refresh time + Thread.sleep(1); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + + // Sleep for 100ms and then the lag should be within 100ms +/- 20ms + Thread.sleep(100); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - 100) <= 20); + + transferTracker.updateRemoteRefreshTimeMs(transferTracker.getLocalRefreshTimeMs()); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + long random = randomIntBetween(50, 200); + Thread.sleep(random); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - random) <= 20); } public void testAddUploadBytesStarted() { @@ -519,7 +530,7 @@ public void testStatsObjectCreation() { transferTracker = constructTracker(); RemoteSegmentTransferTracker.Stats transferTrackerStats = transferTracker.stats(); assertEquals(transferTracker.getShardId(), transferTrackerStats.shardId); - assertEquals(transferTracker.getTimeMsLag(), (int) transferTrackerStats.refreshTimeLagMs); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - transferTrackerStats.refreshTimeLagMs) <= 20); assertEquals(transferTracker.getLocalRefreshSeqNo(), (int) transferTrackerStats.localRefreshNumber); assertEquals(transferTracker.getRemoteRefreshSeqNo(), (int) transferTrackerStats.remoteRefreshNumber); assertEquals(transferTracker.getBytesLag(), (int) transferTrackerStats.bytesLag); @@ -591,9 +602,9 @@ private RemoteSegmentTransferTracker constructTracker() { ); transferTracker.incrementTotalUploadsStarted(); transferTracker.incrementTotalUploadsFailed(); - transferTracker.updateUploadTimeMovingAverage(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateUploadTimeMovingAverage(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.updateUploadBytesMovingAverage(99); - transferTracker.updateRemoteRefreshTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateRemoteRefreshTimeMs(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.incrementRejectionCount(); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesStarted(10); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesSucceeded(10, System.currentTimeMillis()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java index de610083f3327..cb77174e612fd 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java @@ -21,8 +21,11 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.IntStream; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; import static org.opensearch.index.remote.RemoteStoreTestsHelper.createIndexShard; public class RemoteStorePressureServiceTests extends OpenSearchTestCase { @@ -68,7 +71,7 @@ public void testIsSegmentsUploadBackpressureEnabled() { assertTrue(pressureService.isSegmentsUploadBackpressureEnabled()); } - public void testValidateSegmentUploadLag() { + public void testValidateSegmentUploadLag() throws InterruptedException { // Create the pressure tracker IndexShard indexShard = createIndexShard(shardId, true); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, Settings.EMPTY); @@ -86,14 +89,27 @@ public void testValidateSegmentUploadLag() { sum.addAndGet(i); }); double avg = (double) sum.get() / 20; - long currentMs = System.nanoTime() / 1_000_000; - pressureTracker.updateLocalRefreshTimeMs((long) (currentMs + 12 * avg)); - pressureTracker.updateRemoteRefreshTimeMs(currentMs); - Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); - assertTrue(e.getMessage().contains("due to remote segments lagging behind local segments")); - assertTrue(e.getMessage().contains("time_lag:114 ms dynamic_time_lag_threshold:95.0 ms")); - pressureTracker.updateRemoteRefreshTimeMs((long) (currentMs + 2 * avg)); + // We run this to ensure that the local and remote refresh time are not same anymore + while (pressureTracker.getLocalRefreshTimeMs() == currentTimeMsUsingSystemNanos()) { + Thread.sleep(10); + } + long localRefreshTimeMs = currentTimeMsUsingSystemNanos(); + pressureTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); + + while (currentTimeMsUsingSystemNanos() - localRefreshTimeMs <= 20 * avg) { + Thread.sleep((long) (4 * avg)); + } + Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); + String regex = "^rejected execution on primary shard:\\[index]\\[0] due to remote segments lagging behind " + + "local segments.time_lag:[0-9]{2,3} ms dynamic_time_lag_threshold:95\\.0 ms$"; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(e.getMessage()); + assertTrue(matcher.matches()); + + pressureTracker.updateRemoteRefreshTimeMs(pressureTracker.getLocalRefreshTimeMs()); + pressureTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + Thread.sleep((long) (2 * avg)); pressureService.validateSegmentsUploadLag(shardId); // 2. bytes lag more than dynamic threshold From 01e320ff34f365b95daf07a01506019e46d8f3a7 Mon Sep 17 00:00:00 2001 From: Ajay Kumar Movva Date: Sat, 21 Oct 2023 02:50:15 +0530 Subject: [PATCH 13/13] Added changes to integrade cpu AC to ResourceUsageCollector and Emit Stats Signed-off-by: Ajay Kumar Movva --- CHANGELOG.md | 1 + .../admin/cluster/node/stats/NodeStats.java | 21 ++++- .../cluster/node/stats/NodesStatsRequest.java | 3 +- .../node/stats/TransportNodesStatsAction.java | 3 +- .../action/search/SearchTransportService.java | 11 +++ .../TransportReplicationAction.java | 30 ++++++-- .../common/network/NetworkModule.java | 15 ++++ .../main/java/org/opensearch/node/Node.java | 29 +++---- .../java/org/opensearch/node/NodeService.java | 12 ++- .../AdmissionControlService.java | 52 ++++++++++--- .../controllers/AdmissionController.java | 53 ++++++++++--- .../CPUBasedAdmissionController.java | 55 +++++++++++--- ...e.java => AdmissionControlActionType.java} | 6 +- .../stats/AdmissionControlStats.java | 76 +++++++++++++++++++ .../stats/BaseAdmissionControllerStats.java | 15 ++++ .../CPUBasedAdmissionControllerStats.java | 76 +++++++++++++++++++ .../AdmissionControlTransportHandler.java | 8 +- .../AdmissionControlTransportInterceptor.java | 6 +- .../transport/TransportInterceptor.java | 21 +++++ .../transport/TransportService.java | 34 +++++++++ .../AdmissionControlServiceTests.java | 23 +++--- .../CPUBasedAdmissionControllerTests.java | 29 ++++--- .../enums/TransportActionTypeTests.java | 10 +-- ...AdmissionControlTransportHandlerTests.java | 13 ++-- .../MockInternalClusterInfoService.java | 3 +- 25 files changed, 505 insertions(+), 100 deletions(-) rename server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/{TransportActionType.java => AdmissionControlActionType.java} (85%) create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStats.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/BaseAdmissionControllerStats.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/CPUBasedAdmissionControllerStats.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 374dd4ab57ee6..76bf757083d15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) +- [AdmissionControl] Added changes to integrade cpu AC to ResourceUsageCollector and Emit Stats ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 3d37056956c69..1598fbaf3711a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -58,6 +58,7 @@ import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; import org.opensearch.node.NodesResourceUsageStats; +import org.opensearch.ratelimitting.admissioncontrol.stats.AdmissionControlStats; import org.opensearch.repositories.RepositoriesStats; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; @@ -154,6 +155,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private RepositoriesStats repositoriesStats; + @Nullable + private AdmissionControlStats admissionControlStats; + public NodeStats(StreamInput in) throws IOException { super(in); timestamp = in.readVLong(); @@ -226,6 +230,11 @@ public NodeStats(StreamInput in) throws IOException { } else { repositoriesStats = null; } + if(in.getVersion().onOrAfter(Version.V_3_0_0)) { + admissionControlStats = in.readOptionalWriteable(AdmissionControlStats::new); + } else { + admissionControlStats = null; + } } public NodeStats( @@ -255,7 +264,8 @@ public NodeStats( @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, - @Nullable RepositoriesStats repositoriesStats + @Nullable RepositoriesStats repositoriesStats, + @Nullable AdmissionControlStats admissionControlStats ) { super(node); this.timestamp = timestamp; @@ -284,6 +294,7 @@ public NodeStats( this.searchPipelineStats = searchPipelineStats; this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; + this.admissionControlStats = admissionControlStats; } public long getTimestamp() { @@ -435,6 +446,11 @@ public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } + @Nullable + public AdmissionControlStats getAdmissionControlStats() { + return admissionControlStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -588,6 +604,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } + if (getAdmissionControlStats() != null) { + getAdmissionControlStats().toXContent(builder, params); + } return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index fc72668d36413..95c96ffb20757 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -216,7 +216,8 @@ public enum Metric { SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), - REPOSITORIES("repositories"); + REPOSITORIES("repositories"), + ADMISSION_CONTROL("admission_control"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 99cf42cfdc4d0..1df73d3b4394d 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -127,7 +127,8 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), - NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) + NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics), + NodesStatsRequest.Metric.ADMISSION_CONTROL.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchTransportService.java b/server/src/main/java/org/opensearch/action/search/SearchTransportService.java index a723937afd2ed..64c738f633f2e 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchTransportService.java +++ b/server/src/main/java/org/opensearch/action/search/SearchTransportService.java @@ -45,6 +45,7 @@ import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.transport.TransportResponse; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchService; import org.opensearch.search.dfs.DfsSearchResult; @@ -542,6 +543,9 @@ public static void registerRequestHandler(TransportService transportService, Sea transportService.registerRequestHandler( DFS_ACTION_NAME, ThreadPool.Names.SAME, + false, + true, + AdmissionControlActionType.SEARCH, ShardSearchRequest::new, (request, channel, task) -> searchService.executeDfsPhase( request, @@ -556,6 +560,9 @@ public static void registerRequestHandler(TransportService transportService, Sea transportService.registerRequestHandler( QUERY_ACTION_NAME, ThreadPool.Names.SAME, + false, + true, + AdmissionControlActionType.SEARCH, ShardSearchRequest::new, (request, channel, task) -> { searchService.executeQueryPhase( @@ -575,6 +582,9 @@ public static void registerRequestHandler(TransportService transportService, Sea transportService.registerRequestHandler( QUERY_ID_ACTION_NAME, ThreadPool.Names.SAME, + false, + true, + AdmissionControlActionType.SEARCH, QuerySearchRequest::new, (request, channel, task) -> { searchService.executeQueryPhase( @@ -633,6 +643,7 @@ public static void registerRequestHandler(TransportService transportService, Sea ThreadPool.Names.SAME, true, true, + AdmissionControlActionType.SEARCH, ShardFetchSearchRequest::new, (request, channel, task) -> { searchService.executeFetchPhase( diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java index ddebdc5530e70..7dd34fff1b159 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java @@ -38,6 +38,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListenerResponseHandler; import org.opensearch.action.UnavailableShardsException; +import org.opensearch.action.bulk.TransportShardBulkAction; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.ActiveShardCount; import org.opensearch.action.support.ChannelActionListener; @@ -82,6 +83,7 @@ import org.opensearch.indices.IndexClosedException; import org.opensearch.indices.IndicesService; import org.opensearch.node.NodeClosedException; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.tasks.Task; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.ConnectTransportException; @@ -219,14 +221,26 @@ protected TransportReplicationAction( transportService.registerRequestHandler(actionName, ThreadPool.Names.SAME, requestReader, this::handleOperationRequest); - transportService.registerRequestHandler( - transportPrimaryAction, - executor, - forceExecutionOnPrimary, - true, - in -> new ConcreteShardRequest<>(requestReader, in), - this::handlePrimaryRequest - ); + if(transportPrimaryAction.equals(TransportShardBulkAction.ACTION_NAME + PRIMARY_ACTION_SUFFIX)){ + transportService.registerRequestHandler( + transportPrimaryAction, + executor, + forceExecutionOnPrimary, + true, + AdmissionControlActionType.INDEXING, + in -> new ConcreteShardRequest<>(requestReader, in), + this::handlePrimaryRequest + ); + } else { + transportService.registerRequestHandler( + transportPrimaryAction, + executor, + forceExecutionOnPrimary, + true, + in -> new ConcreteShardRequest<>(requestReader, in), + this::handlePrimaryRequest + ); + } // we must never reject on because of thread pool capacity on replicas transportService.registerRequestHandler( diff --git a/server/src/main/java/org/opensearch/common/network/NetworkModule.java b/server/src/main/java/org/opensearch/common/network/NetworkModule.java index 821d48fccf48c..7fa8ec771b488 100644 --- a/server/src/main/java/org/opensearch/common/network/NetworkModule.java +++ b/server/src/main/java/org/opensearch/common/network/NetworkModule.java @@ -55,6 +55,7 @@ import org.opensearch.http.HttpServerTransport; import org.opensearch.index.shard.PrimaryReplicaSyncer.ResyncTask; import org.opensearch.plugins.NetworkPlugin; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.tasks.RawTaskStatus; import org.opensearch.tasks.Task; import org.opensearch.telemetry.tracing.Tracer; @@ -299,6 +300,20 @@ public TransportRequestHandler interceptHandler( return actualHandler; } + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler, + AdmissionControlActionType transportActionType + ) { + for (TransportInterceptor interceptor : this.transportInterceptors) { + actualHandler = interceptor.interceptHandler(action, executor, forceExecution, actualHandler, transportActionType); + } + return actualHandler; + } + @Override public AsyncSender interceptSender(AsyncSender sender) { for (TransportInterceptor interceptor : this.transportInterceptors) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index e80b768074fc7..3fb75089b9865 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -894,12 +894,24 @@ protected Node( final RestController restController = actionModule.getRestController(); - final AdmissionControlService admissionControlService = new AdmissionControlService( + final NodeResourceUsageTracker nodeResourceUsageTracker = new NodeResourceUsageTracker( + threadPool, settings, - clusterService.getClusterSettings(), + clusterService.getClusterSettings() + ); + final ResourceUsageCollectorService resourceUsageCollectorService = new ResourceUsageCollectorService( + nodeResourceUsageTracker, + clusterService, threadPool ); + final AdmissionControlService admissionControlService = new AdmissionControlService( + settings, + clusterService, + threadPool, + resourceUsageCollectorService + ); + AdmissionControlTransportInterceptor admissionControlTransportInterceptor = new AdmissionControlTransportInterceptor( admissionControlService ); @@ -1101,16 +1113,6 @@ protected Node( transportService.getTaskManager(), taskCancellationMonitoringSettings ); - final NodeResourceUsageTracker nodeResourceUsageTracker = new NodeResourceUsageTracker( - threadPool, - settings, - clusterService.getClusterSettings() - ); - final ResourceUsageCollectorService resourceUsageCollectorService = new ResourceUsageCollectorService( - nodeResourceUsageTracker, - clusterService, - threadPool - ); this.nodeService = new NodeService( settings, threadPool, @@ -1135,7 +1137,8 @@ protected Node( taskCancellationMonitoringService, resourceUsageCollectorService, segmentReplicationStatsTracker, - repositoryService + repositoryService, + admissionControlService ); final SearchService searchService = newSearchService( diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index 49dde0b81cac7..3c6dd15834f57 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -54,6 +54,7 @@ import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; import org.opensearch.plugins.PluginsService; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.AggregationUsageService; @@ -96,6 +97,7 @@ public class NodeService implements Closeable { private final FileCache fileCache; private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + AdmissionControlService admissionControlService; private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; @@ -123,7 +125,8 @@ public class NodeService implements Closeable { TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, SegmentReplicationStatsTracker segmentReplicationStatsTracker, - RepositoriesService repositoriesService + RepositoriesService repositoriesService, + AdmissionControlService admissionControlService ) { this.settings = settings; this.threadPool = threadPool; @@ -148,6 +151,7 @@ public class NodeService implements Closeable { this.taskCancellationMonitoringService = taskCancellationMonitoringService; this.resourceUsageCollectorService = resourceUsageCollectorService; this.repositoriesService = repositoriesService; + this.admissionControlService = admissionControlService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; @@ -232,7 +236,8 @@ public NodeStats stats( boolean searchPipelineStats, boolean resourceUsageStats, boolean segmentReplicationTrackerStats, - boolean repositoriesStats + boolean repositoriesStats, + boolean admissionControl ) { // for indices stats we want to include previous allocated shards stats as well (it will // only be applied to the sensible ones to use, like refresh/merge/flush/indexing stats) @@ -263,7 +268,8 @@ public NodeStats stats( taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, - repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null + repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null, + admissionControl ? this.admissionControlService.stats(): null ); } diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java index 2cc409b0e4465..b71b062dc788d 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java @@ -10,10 +10,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.node.ResourceUsageCollectorService; import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; +import org.opensearch.ratelimitting.admissioncontrol.stats.AdmissionControlStats; +import org.opensearch.ratelimitting.admissioncontrol.stats.BaseAdmissionControllerStats; +import org.opensearch.ratelimitting.admissioncontrol.stats.CPUBasedAdmissionControllerStats; import org.opensearch.threadpool.ThreadPool; import java.util.ArrayList; @@ -31,21 +37,24 @@ public class AdmissionControlService { public final AdmissionControlSettings admissionControlSettings; private final ConcurrentMap ADMISSION_CONTROLLERS; private static final Logger logger = LogManager.getLogger(AdmissionControlService.class); - private final ClusterSettings clusterSettings; + private final ClusterService clusterService; private final Settings settings; + private ResourceUsageCollectorService resourceUsageCollectorService; + /** * * @param settings Immutable settings instance - * @param clusterSettings ClusterSettings Instance + * @param clusterService ClusterService Instance * @param threadPool ThreadPool Instance */ - public AdmissionControlService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + public AdmissionControlService(Settings settings, ClusterService clusterService, ThreadPool threadPool, ResourceUsageCollectorService resourceUsageCollectorService) { this.threadPool = threadPool; - this.admissionControlSettings = new AdmissionControlSettings(clusterSettings, settings); + this.admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); this.ADMISSION_CONTROLLERS = new ConcurrentHashMap<>(); - this.clusterSettings = clusterSettings; + this.clusterService = clusterService; this.settings = settings; + this.resourceUsageCollectorService = resourceUsageCollectorService; this.initialise(); } @@ -58,10 +67,12 @@ private void initialise() { } /** - * Handler to trigger registered admissionController + * + * @param action transport action that is being executed. we are using it for logging while request is rejected + * @param admissionControlActionType type of the admissionControllerActionType */ - public void applyTransportAdmissionControl(String action) { - this.ADMISSION_CONTROLLERS.forEach((name, admissionController) -> { admissionController.apply(action); }); + public void applyTransportAdmissionControl(String action, AdmissionControlActionType admissionControlActionType) { + this.ADMISSION_CONTROLLERS.forEach((name, admissionController) -> { admissionController.apply(action, admissionControlActionType); }); } /** @@ -79,7 +90,7 @@ public void registerAdmissionController(String admissionControllerName) { private AdmissionController controllerFactory(String admissionControllerName) { switch (admissionControllerName) { case CPU_BASED_ADMISSION_CONTROLLER: - return new CPUBasedAdmissionController(admissionControllerName, this.settings, this.clusterSettings); + return new CPUBasedAdmissionController(admissionControllerName, this.settings, this.clusterService, this.resourceUsageCollectorService); default: throw new IllegalArgumentException("Not Supported AdmissionController : " + admissionControllerName); } @@ -101,4 +112,27 @@ public List getAdmissionControllers() { public AdmissionController getAdmissionController(String controllerName) { return this.ADMISSION_CONTROLLERS.getOrDefault(controllerName, null); } + + public AdmissionControlStats stats(){ + List statsList = new ArrayList<>(); + if(this.ADMISSION_CONTROLLERS.size() > 0){ + this.ADMISSION_CONTROLLERS.forEach((controllerName, admissionController) -> { + BaseAdmissionControllerStats admissionControllerStats = controllerStatsFactory(admissionController); + if(admissionControllerStats != null) { + statsList.add(admissionControllerStats); + } + }); + return new AdmissionControlStats(statsList); + } + return null; + } + + private BaseAdmissionControllerStats controllerStatsFactory(AdmissionController admissionController) { + switch (admissionController.getName()) { + case CPU_BASED_ADMISSION_CONTROLLER: + return new CPUBasedAdmissionControllerStats(admissionController); + default: + return null; + } + } } diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java index 00564a9967f31..794a70f7a7483 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java @@ -8,8 +8,15 @@ package org.opensearch.ratelimitting.admissioncontrol.controllers; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.node.ResourceUsageCollectorService; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; /** @@ -21,15 +28,21 @@ public abstract class AdmissionController { private final AtomicLong rejectionCount; private final String admissionControllerName; + final ResourceUsageCollectorService resourceUsageCollectorService; + public final Map rejectionCountMap; + public final ClusterService clusterService; /** - * - * @param rejectionCount initialised rejectionCount value for AdmissionController + * @param rejectionCount initialised rejectionCount value for AdmissionController * @param admissionControllerName name of the admissionController + * @param clusterService */ - public AdmissionController(AtomicLong rejectionCount, String admissionControllerName) { + public AdmissionController(AtomicLong rejectionCount, String admissionControllerName, ResourceUsageCollectorService resourceUsageCollectorService, ClusterService clusterService) { this.rejectionCount = rejectionCount; this.admissionControllerName = admissionControllerName; + this.resourceUsageCollectorService = resourceUsageCollectorService; + this.clusterService = clusterService; + this.rejectionCountMap = ConcurrentCollections.newConcurrentMap(); } /** @@ -40,11 +53,19 @@ public boolean isEnabledForTransportLayer(AdmissionControlMode admissionControlM return admissionControlMode != AdmissionControlMode.DISABLED; } + /** + * + * @return true if admissionController is Enforced Mode else false + */ + public Boolean isAdmissionControllerEnforced(AdmissionControlMode admissionControlMode) { + return admissionControlMode == AdmissionControlMode.ENFORCED; + } + /** * Increment the tracking-objects and apply the admission control if threshold is breached. * Mostly applicable while applying admission controller */ - public abstract void apply(String action); + public abstract void apply(String action, AdmissionControlActionType admissionControlActionType); /** * @return name of the admission-controller @@ -53,18 +74,26 @@ public String getName() { return this.admissionControllerName; } - /** - * Adds the rejection count for the controller. Primarily used when copying controller states. - * @param count To add the value of the tracking resource object as the provided count - */ - public void addRejectionCount(long count) { - this.rejectionCount.addAndGet(count); + public void addRejectionCount(String admissionControlActionType, long count) { + AtomicLong updatedCount = new AtomicLong(0); + if(this.rejectionCountMap.containsKey(admissionControlActionType)){ + updatedCount.addAndGet(this.rejectionCountMap.get(admissionControlActionType).get()); + } + updatedCount.addAndGet(count); + this.rejectionCountMap.put(admissionControlActionType, updatedCount); } /** * @return current value of the rejection count metric tracked by the admission-controller. */ - public long getRejectionCount() { - return this.rejectionCount.get(); + public long getRejectionCount(String admissionControlActionType) { + AtomicLong rejectionCount = this.rejectionCountMap.getOrDefault(admissionControlActionType, new AtomicLong()); + return rejectionCount.get(); + } + + public Map getRejectionStats() { + Map rejectionStats = new HashMap<>(); + rejectionCountMap.forEach((actionType, count) -> rejectionStats.put(actionType, count.get())); + return rejectionStats; } } diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java index 3a8956b2cce87..2514b1e83fd04 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java @@ -10,10 +10,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.node.NodeResourceUsageStats; +import org.opensearch.node.ResourceUsageCollectorService; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; /** @@ -28,9 +36,9 @@ public class CPUBasedAdmissionController extends AdmissionController { * * @param admissionControllerName State of the admission controller */ - public CPUBasedAdmissionController(String admissionControllerName, Settings settings, ClusterSettings clusterSettings) { - super(new AtomicLong(0), admissionControllerName); - this.settings = new CPUBasedAdmissionControllerSettings(clusterSettings, settings); + public CPUBasedAdmissionController(String admissionControllerName, Settings settings, ClusterService clusterService, ResourceUsageCollectorService resourceUsageCollectorService) { + super(new AtomicLong(0), admissionControllerName, resourceUsageCollectorService, clusterService); + this.settings = new CPUBasedAdmissionControllerSettings(clusterService.getClusterSettings(), settings); } /** @@ -38,18 +46,43 @@ public CPUBasedAdmissionController(String admissionControllerName, Settings sett * @param action is the transport action */ @Override - public void apply(String action) { + public void apply(String action, AdmissionControlActionType admissionControlActionType) { // TODO Will extend this logic further currently just incrementing rejectionCount if (this.isEnabledForTransportLayer(this.settings.getTransportLayerAdmissionControllerMode())) { - this.applyForTransportLayer(action); + this.applyForTransportLayer(action, admissionControlActionType); } } - private void applyForTransportLayer(String actionName) { - // currently incrementing counts to evaluate the controller triggering as expected and using in testing so limiting to 10 - // TODO will update rejection logic further in next PR's - if (this.getRejectionCount() < 10) { - this.addRejectionCount(1); + private void applyForTransportLayer(String actionName, AdmissionControlActionType admissionControlActionType) { + if (isLimitsBreached(admissionControlActionType)) { + this.addRejectionCount(admissionControlActionType.getType(), 1); + if (this.isAdmissionControllerEnforced(this.settings.getTransportLayerAdmissionControllerMode())) { + throw new OpenSearchRejectedExecutionException("Action ["+ actionName +"] was rejected due to CPU usage admission controller limit breached"); + } + } + } + + private boolean isLimitsBreached(AdmissionControlActionType transportActionType) { + long maxCpuLimit = this.getCpuRejectionThreshold(transportActionType); + Optional nodePerformanceStatistics = this.resourceUsageCollectorService.getNodeStatistics(this.clusterService.state().nodes().getLocalNodeId()); + if(nodePerformanceStatistics.isPresent()) { + double cpuUsage = nodePerformanceStatistics.get().getCpuUtilizationPercent(); + if (cpuUsage >= maxCpuLimit){ + LOGGER.warn("CpuBasedAdmissionController rejected the request as the current CPU usage [" + + cpuUsage + "%] exceeds the allowed limit [" + maxCpuLimit + "%]"); + return true; + } + } + return false; + } + private long getCpuRejectionThreshold(AdmissionControlActionType transportActionType) { + switch (transportActionType) { + case SEARCH: + return this.settings.getSearchCPULimit(); + case INDEXING: + return this.settings.getIndexingCPULimit(); + default: + throw new IllegalArgumentException("Not Supported TransportAction Type: " + transportActionType.getType()); } } } diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlActionType.java similarity index 85% rename from server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java rename to server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlActionType.java index f2fdca0cfe49b..8cf6e973ceb64 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlActionType.java @@ -13,13 +13,13 @@ /** * Enums that defines the type of the transport requests */ -public enum TransportActionType { +public enum AdmissionControlActionType { INDEXING("indexing"), SEARCH("search"); private final String type; - TransportActionType(String uriType) { + AdmissionControlActionType(String uriType) { this.type = uriType; } @@ -31,7 +31,7 @@ public String getType() { return type; } - public static TransportActionType fromName(String name) { + public static AdmissionControlActionType fromName(String name) { name = name.toLowerCase(Locale.ROOT); switch (name) { case "indexing": diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStats.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStats.java new file mode 100644 index 0000000000000..188feb77318e4 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStats.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.stats; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +public class AdmissionControlStats implements ToXContentFragment, Writeable { + + List admissionControllerStatsList; + + /** + * + * @param admissionControllerStatsList list of admissionControllerStats + */ + public AdmissionControlStats(List admissionControllerStatsList){ + this.admissionControllerStatsList = admissionControllerStatsList; + } + + /** + * + * @param in the stream to read from + * @throws IOException if an I/O error occurs + */ + public AdmissionControlStats(StreamInput in) throws IOException { + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.admissionControllerStatsList = in.readNamedWriteableList(BaseAdmissionControllerStats.class); + } else { + this.admissionControllerStatsList = null; + } + } + + /** + * Write this into the {@linkplain StreamOutput}. + * + * @param out the output stream to write entity content to + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeList(this.admissionControllerStatsList); + } + } + + /** + * @param builder + * @param params + * @return + * @throws IOException + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("admission_control"); + this.admissionControllerStatsList.forEach(stats -> { + try { + builder.field(stats.getWriteableName(), stats); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + return builder.endObject(); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/BaseAdmissionControllerStats.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/BaseAdmissionControllerStats.java new file mode 100644 index 0000000000000..0ee1807bf80da --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/BaseAdmissionControllerStats.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.stats; + +import org.opensearch.core.common.io.stream.NamedWriteable; +import org.opensearch.core.xcontent.ToXContentFragment; + +public abstract class BaseAdmissionControllerStats implements NamedWriteable, ToXContentFragment { +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/CPUBasedAdmissionControllerStats.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/CPUBasedAdmissionControllerStats.java new file mode 100644 index 0000000000000..7b4e4a9695509 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/stats/CPUBasedAdmissionControllerStats.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.stats; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; + +import java.io.IOException; +import java.util.Map; + +import static org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER; +public class CPUBasedAdmissionControllerStats extends BaseAdmissionControllerStats { + + /** + * Returns the name of the writeable object + */ + @Override + public String getWriteableName() { + return CPU_BASED_ADMISSION_CONTROLLER; + } + + public Map rejectionCount; + + public CPUBasedAdmissionControllerStats(AdmissionController admissionController){ + this.rejectionCount = admissionController.getRejectionStats(); + } + + public CPUBasedAdmissionControllerStats(StreamInput in) throws IOException { + this.rejectionCount = in.readMap(StreamInput::readString, StreamInput::readLong); + } + /** + * Write this into the {@linkplain StreamOutput}. + * + * @param out + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(this.rejectionCount, StreamOutput::writeString, StreamOutput::writeLong); + } + + /** + * @param builder + * @param params + * @return + * @throws IOException + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject("transport"); + { + builder.startObject("rejection_count"); + { + this.rejectionCount.forEach((actionType, count) -> { + try { + builder.field(actionType, count); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + builder.endObject(); + } + builder.endObject(); + return builder.endObject(); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java index 7d0f5fbc17a51..dfe286d9b9537 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.tasks.Task; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportRequest; @@ -28,18 +29,21 @@ public class AdmissionControlTransportHandler implem protected final Logger log = LogManager.getLogger(this.getClass()); AdmissionControlService admissionControlService; boolean forceExecution; + AdmissionControlActionType admissionControlActionType; public AdmissionControlTransportHandler( String action, TransportRequestHandler actualHandler, AdmissionControlService admissionControlService, - boolean forceExecution + boolean forceExecution, + AdmissionControlActionType admissionControlActionType ) { super(); this.action = action; this.actualHandler = actualHandler; this.admissionControlService = admissionControlService; this.forceExecution = forceExecution; + this.admissionControlActionType = admissionControlActionType; } /** @@ -53,7 +57,7 @@ public void messageReceived(T request, TransportChannel channel, Task task) thro // intercept all the transport requests here and apply admission control try { // TODO Need to evaluate if we need to apply admission control or not if force Execution is true will update in next PR. - this.admissionControlService.applyTransportAdmissionControl(this.action); + this.admissionControlService.applyTransportAdmissionControl(this.action, this.admissionControlActionType); } catch (final OpenSearchRejectedExecutionException openSearchRejectedExecutionException) { log.warn(openSearchRejectedExecutionException.getMessage()); channel.sendResponse(openSearchRejectedExecutionException); diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java index 01cfcbd780006..c725af821ac8f 100644 --- a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java @@ -9,6 +9,7 @@ package org.opensearch.ratelimitting.admissioncontrol.transport; import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.transport.TransportInterceptor; import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; @@ -33,8 +34,9 @@ public TransportRequestHandler interceptHandler( String action, String executor, boolean forceExecution, - TransportRequestHandler actualHandler + TransportRequestHandler actualHandler, + AdmissionControlActionType admissionControlActionType ) { - return new AdmissionControlTransportHandler<>(action, actualHandler, this.admissionControlService, forceExecution); + return new AdmissionControlTransportHandler<>(action, actualHandler, this.admissionControlService, forceExecution, admissionControlActionType); } } diff --git a/server/src/main/java/org/opensearch/transport/TransportInterceptor.java b/server/src/main/java/org/opensearch/transport/TransportInterceptor.java index 9ee2db6d39893..12b0990a5d692 100644 --- a/server/src/main/java/org/opensearch/transport/TransportInterceptor.java +++ b/server/src/main/java/org/opensearch/transport/TransportInterceptor.java @@ -35,6 +35,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.core.common.io.stream.Writeable.Reader; import org.opensearch.core.transport.TransportResponse; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; /** * This interface allows plugins to intercept requests on both the sender and the receiver side. @@ -57,6 +58,26 @@ default TransportRequestHandler interceptHandler return actualHandler; } + /** + * + * @param action + * @param executor + * @param forceExecution + * @param actualHandler + * @param transportActionType + * @return + * @param + */ + default TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler, + AdmissionControlActionType transportActionType + ) { + return interceptHandler(action, executor, forceExecution, actualHandler); + } + /** * This is called up-front providing the actual low level {@link AsyncSender} that performs the low level send request. * The returned sender is used to send all requests that come in via diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index de88c3619abe8..a55a20478aa3d 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -64,6 +64,7 @@ import org.opensearch.core.service.ReportingService; import org.opensearch.core.transport.TransportResponse; import org.opensearch.node.NodeClosedException; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskManager; import org.opensearch.telemetry.tracing.Span; @@ -1241,6 +1242,39 @@ public void registerRequestHandler( transport.registerRequestHandler(reg); } + /** + * Registers a new request handler + * + * @param action The action the request handler is associated with + * @param requestReader The request class that will be used to construct new instances for streaming + * @param executor The executor the request handling will be executed on + * @param forceExecution Force execution on the executor queue and never reject it + * @param transportActionType Check the request size and raise an exception in case the limit is breached. + * @param handler The handler itself that implements the request handling + */ + public void registerRequestHandler( + String action, + String executor, + boolean forceExecution, + boolean canTripCircuitBreaker, + AdmissionControlActionType transportActionType, + Writeable.Reader requestReader, + TransportRequestHandler handler + ) { + validateActionName(action); + handler = interceptor.interceptHandler(action, executor, forceExecution, handler, transportActionType); + RequestHandlerRegistry reg = new RequestHandlerRegistry<>( + action, + requestReader, + taskManager, + handler, + executor, + forceExecution, + canTripCircuitBreaker + ); + transport.registerRequestHandler(reg); + } + /** * called by the {@link Transport} implementation when an incoming request arrives but before * any parsing of it has happened (with the exception of the requestId and action) diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java index bac4eaf3fd677..abd38a3cbf1fb 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java @@ -13,6 +13,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; import org.opensearch.test.OpenSearchTestCase; @@ -46,13 +47,13 @@ public void tearDown() throws Exception { } public void testWhenAdmissionControllerRegistered() { - admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService, threadPool, null); assertEquals(admissionControlService.getAdmissionControllers().size(), 1); } public void testRegisterInvalidAdmissionController() { String test = "TEST"; - admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService, threadPool, null); assertEquals(admissionControlService.getAdmissionControllers().size(), 1); IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, @@ -62,7 +63,7 @@ public void testRegisterInvalidAdmissionController() { } public void testAdmissionControllerSettings() { - admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService, threadPool, null); AdmissionControlSettings admissionControlSettings = admissionControlService.admissionControlSettings; List admissionControllerList = admissionControlService.getAdmissionControllers(); assertEquals(admissionControllerList.size(), 1); @@ -105,19 +106,19 @@ public void testAdmissionControllerSettings() { public void testApplyAdmissionControllerDisabled() { this.action = "indices:data/write/bulk[s][p]"; - admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); - admissionControlService.applyTransportAdmissionControl(this.action); + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService, threadPool, null); + admissionControlService.applyTransportAdmissionControl(this.action, null); List admissionControllerList = admissionControlService.getAdmissionControllers(); - admissionControllerList.forEach(admissionController -> { assertEquals(admissionController.getRejectionCount(), 0); }); + admissionControllerList.forEach(admissionController -> { assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); }); } public void testApplyAdmissionControllerEnabled() { this.action = "indices:data/write/bulk[s][p]"; - admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); - admissionControlService.applyTransportAdmissionControl(this.action); + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService, threadPool,null); + admissionControlService.applyTransportAdmissionControl(this.action, null); assertEquals( admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) - .getRejectionCount(), + .getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0 ); @@ -128,12 +129,12 @@ public void testApplyAdmissionControllerEnabled() { ) .build(); clusterService.getClusterSettings().applySettings(settings); - admissionControlService.applyTransportAdmissionControl(this.action); + admissionControlService.applyTransportAdmissionControl(this.action, null); List admissionControllerList = admissionControlService.getAdmissionControllers(); assertEquals(admissionControllerList.size(), 1); assertEquals( admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) - .getRejectionCount(), + .getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 1 ); } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java index af6ec0749e709..2473b242f71b5 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java @@ -11,6 +11,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; import org.opensearch.test.OpenSearchTestCase; @@ -45,10 +46,11 @@ public void testCheckDefaultParameters() { admissionController = new CPUBasedAdmissionController( CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, Settings.EMPTY, - clusterService.getClusterSettings() + clusterService, + null ); assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); - assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); assertFalse( admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode()) @@ -59,7 +61,8 @@ public void testCheckUpdateSettings() { admissionController = new CPUBasedAdmissionController( CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, Settings.EMPTY, - clusterService.getClusterSettings() + clusterService, + null ); Settings settings = Settings.builder() .put( @@ -70,7 +73,7 @@ public void testCheckUpdateSettings() { clusterService.getClusterSettings().applySettings(settings); assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); - assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); } @@ -79,13 +82,14 @@ public void testApplyControllerWithDefaultSettings() { admissionController = new CPUBasedAdmissionController( CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, Settings.EMPTY, - clusterService.getClusterSettings() + clusterService, + null ); - assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); action = "indices:data/write/bulk[s][p]"; - admissionController.apply(action); - assertEquals(admissionController.getRejectionCount(), 0); + admissionController.apply(action, AdmissionControlActionType.INDEXING); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); } public void testApplyControllerWhenSettingsEnabled() { @@ -98,12 +102,13 @@ public void testApplyControllerWhenSettingsEnabled() { admissionController = new CPUBasedAdmissionController( CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, settings, - clusterService.getClusterSettings() + clusterService, + null ); assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); - assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 0); action = "indices:data/write/bulk[s][p]"; - admissionController.apply(action); - assertEquals(admissionController.getRejectionCount(), 1); + admissionController.apply(action, AdmissionControlActionType.INDEXING); + assertEquals(admissionController.getRejectionCount(AdmissionControlActionType.INDEXING.getType()), 1); } } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java index 02f582c26f54e..419e9ea8d4827 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java @@ -13,15 +13,15 @@ public class TransportActionTypeTests extends OpenSearchTestCase { public void testValidActionType() { - assertEquals(TransportActionType.SEARCH.getType(), "search"); - assertEquals(TransportActionType.INDEXING.getType(), "indexing"); - assertEquals(TransportActionType.fromName("search"), TransportActionType.SEARCH); - assertEquals(TransportActionType.fromName("indexing"), TransportActionType.INDEXING); + assertEquals(AdmissionControlActionType.SEARCH.getType(), "search"); + assertEquals(AdmissionControlActionType.INDEXING.getType(), "indexing"); + assertEquals(AdmissionControlActionType.fromName("search"), AdmissionControlActionType.SEARCH); + assertEquals(AdmissionControlActionType.fromName("indexing"), AdmissionControlActionType.INDEXING); } public void testInValidActionType() { String name = "test"; - IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> TransportActionType.fromName(name)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AdmissionControlActionType.fromName(name)); assertEquals(ex.getMessage(), "Not Supported TransportAction Type: " + name); } } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java index 03d4819a94045..057cf35a12f6b 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java @@ -29,7 +29,8 @@ public void testHandlerInvoked() throws Exception { action, handler, mock(AdmissionControlService.class), - false + false, + null ); admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); assertEquals(1, handler.count); @@ -38,13 +39,14 @@ public void testHandlerInvoked() throws Exception { public void testHandlerInvokedRejectedException() throws Exception { String action = "TEST"; AdmissionControlService admissionControlService = mock(AdmissionControlService.class); - doThrow(new OpenSearchRejectedExecutionException()).when(admissionControlService).applyTransportAdmissionControl(action); + doThrow(new OpenSearchRejectedExecutionException()).when(admissionControlService).applyTransportAdmissionControl(action, null); InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); admissionControlTransportHandler = new AdmissionControlTransportHandler( action, handler, admissionControlService, - false + false, + null ); try { admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); @@ -58,13 +60,14 @@ public void testHandlerInvokedRejectedException() throws Exception { public void testHandlerInvokedRandomException() throws Exception { String action = "TEST"; AdmissionControlService admissionControlService = mock(AdmissionControlService.class); - doThrow(new NullPointerException()).when(admissionControlService).applyTransportAdmissionControl(action); + doThrow(new NullPointerException()).when(admissionControlService).applyTransportAdmissionControl(action, null); InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); admissionControlTransportHandler = new AdmissionControlTransportHandler( action, handler, admissionControlService, - false + false, + null ); try { admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 2ba4de5e54a67..1ad6083074025 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -123,7 +123,8 @@ List adjustNodesStats(List nodesStats) { nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), nodeStats.getSegmentReplicationRejectionStats(), - nodeStats.getRepositoriesStats() + nodeStats.getRepositoriesStats(), + nodeStats.getAdmissionControlStats() ); }).collect(Collectors.toList()); }