From e87bfddc0c43ffe1d2fb99fdfc4c9fd6ab51560e Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Thu, 27 Jul 2023 14:07:05 +0530 Subject: [PATCH 01/10] Add PrimaryShardBatchAllocator to take allocation decisions for a batch of shards Signed-off-by: Aman Khare --- .../gateway/PrimaryShardBatchAllocator.java | 562 ++++++++++++++++++ 1 file changed, 562 insertions(+) create mode 100644 server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java new file mode 100644 index 0000000000000..36c81100793cf --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -0,0 +1,562 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.gateway; + +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.UnassignedInfo.AllocationStatus; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.NodeAllocationResult; +import org.opensearch.cluster.routing.allocation.NodeAllocationResult.ShardStoreInfo; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.cluster.routing.allocation.decider.Decision.Type; +import org.opensearch.env.ShardLockObtainFailedException; +import org.opensearch.gateway.AsyncBatchShardFetch.FetchResult; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * PrimaryShardBatchAllocator is similar to {@link org.opensearch.gateway.PrimaryShardAllocator} only difference is + * that it can allocate multiple unassigned primary shards wherein PrimaryShardAllocator can only allocate single + * unassigned shard. + * The primary shard batch allocator allocates multiple unassigned primary shards to nodes that hold + * valid copies of the unassigned primaries. It does this by iterating over all unassigned + * primary shards in the routing table and fetching shard metadata from each node in the cluster + * that holds a copy of the shard. The shard metadata from each node is compared against the + * set of valid allocation IDs and for all valid shard copies (if any), the primary shard batch allocator + * executes the allocation deciders to chose a copy to assign the primary shard to. + *

+ * Note that the PrimaryShardBatchAllocator does *not* allocate primaries on index creation + * (see {@link org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator}), + * nor does it allocate primaries when a primary shard failed and there is a valid replica + * copy that can immediately be promoted to primary, as this takes place in {@link RoutingNodes#failShard}. + * + * @opensearch.internal + */ +public abstract class PrimaryShardBatchAllocator extends BaseGatewayShardAllocator { + /** + * Is the allocator responsible for allocating the given {@link ShardRouting}? + */ + private static boolean isResponsibleFor(final ShardRouting shard) { + return shard.primary() // must be primary + && shard.unassigned() // must be unassigned + // only handle either an existing store or a snapshot recovery + && (shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE + || shard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT); + } + + abstract protected FetchResult fetchData(Set shardsEligibleForFetch, + Set inEligibleShards, + RoutingAllocation allocation); + + @Override + public AllocateUnassignedDecision makeAllocationDecision(ShardRouting unassignedShard, + RoutingAllocation allocation, + Logger logger) { + + return makeAllocationDecision(new HashSet<>(Collections.singletonList(unassignedShard)), + allocation, logger).get(unassignedShard); + } + + /** + * Build allocation decisions for all the shards present in the batch identified by batchId. + * @param shards set of shards given for allocation + * @param allocation current allocation of all the shards + * @param logger logger used for logging + * @return shard to allocation decision map + */ + @Override + public HashMap makeAllocationDecision(Set shards, + RoutingAllocation allocation, + Logger logger) { + HashMap shardAllocationDecisions = new HashMap<>(); + final boolean explain = allocation.debugDecision(); + Set shardsEligibleForFetch = new HashSet<>(); + Set shardsNotEligibleForFetch = new HashSet<>(); + // identify ineligible shards + for (ShardRouting shard : shards) { + AllocateUnassignedDecision decision = skipSnapshotRestore(shard, allocation); + if (decision != null) { + shardsNotEligibleForFetch.add(shard); + shardAllocationDecisions.put(shard, decision); + } else { + shardsEligibleForFetch.add(shard); + } + } + // only fetch data for eligible shards + final FetchResult shardsState = fetchData(shardsEligibleForFetch, shardsNotEligibleForFetch, allocation); + // Note : shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map + // and construct the nodeShardState along the way before making any allocation decision. As metadata for a + // particular shard is needed from all the discovery nodes. + + // process the received data + for (ShardRouting unassignedShard : shardsEligibleForFetch) { + if (shardsState.hasData() == false) { + // if fetching is not done, add that no decision in the resultant map + allocation.setHasPendingAsyncFetch(); + List nodeDecisions = null; + if (explain) { + nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation); + } + shardAllocationDecisions.put(unassignedShard, + AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, + nodeDecisions)); + } else { + Map nodeResponses = shardsState.getData(); + Map shardData = new HashMap<>(); + // build data for a shard from all the nodes + for (Map.Entry nodeEntry : nodeResponses.entrySet()) { + shardData.put(nodeEntry.getKey(), + nodeEntry.getValue().getNodeGatewayStartedShardsBatch().get(unassignedShard.shardId())); + } + // get allocation decision for this shard + shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, + shardData, logger)); + } + } + return shardAllocationDecisions; + } + + /** + * Below code is very similar to {@link org.opensearch.gateway.PrimaryShardAllocator} class makeAllocationDecision, + * only difference is that NodeGatewayStartedShards object doesn't have the DiscoveryNode object as + * BaseNodeResponse. So, DiscoveryNode reference is passed in Map so + * corresponding DiscoveryNode object can be used for rest of the implementation. Also, DiscoveryNode object + * reference is added in DecidedNode class to achieve same use case of accessing corresponding DiscoveryNode object. + * @param unassignedShard unassigned shard routing + * @param allocation routing allocation object + * @param shardState shard metadata fetched from all data nodes + * @param logger logger + * @return allocation decision taken for this shard + */ + private AllocateUnassignedDecision getAllocationDecision(ShardRouting unassignedShard, RoutingAllocation allocation, + Map shardState, + Logger logger) { + final boolean explain = allocation.debugDecision(); + // don't create a new IndexSetting object for every shard as this could cause a lot of garbage + // on cluster restart if we allocate a boat load of shards + final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(unassignedShard.index()); + final Set inSyncAllocationIds = indexMetadata.inSyncAllocationIds(unassignedShard.id()); + final boolean snapshotRestore = unassignedShard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT; + + assert inSyncAllocationIds.isEmpty() == false; + // use in-sync allocation ids to select nodes + final PrimaryShardBatchAllocator.NodeShardsResult nodeShardsResult = buildNodeShardsResult( + unassignedShard, + snapshotRestore, + allocation.getIgnoreNodes(unassignedShard.shardId()), + inSyncAllocationIds, + shardState, + logger + ); + final boolean enoughAllocationsFound = nodeShardsResult.orderedAllocationCandidates.size() > 0; + logger.debug( + "[{}][{}]: found {} allocation candidates of {} based on allocation ids: [{}]", + unassignedShard.index(), + unassignedShard.id(), + nodeShardsResult.orderedAllocationCandidates.size(), + unassignedShard, + inSyncAllocationIds + ); + + if (enoughAllocationsFound == false) { + if (snapshotRestore) { + // let BalancedShardsAllocator take care of allocating this shard + logger.debug( + "[{}][{}]: missing local data, will restore from [{}]", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard.recoverySource() + ); + return AllocateUnassignedDecision.NOT_TAKEN; + } else { + // We have a shard that was previously allocated, but we could not find a valid shard copy to allocate the primary. + // We could just be waiting for the node that holds the primary to start back up, in which case the allocation for + // this shard will be picked up when the node joins and we do another allocation reroute + logger.debug( + "[{}][{}]: not allocating, number_of_allocated_shards_found [{}]", + unassignedShard.index(), + unassignedShard.id(), + nodeShardsResult.allocationsFound + ); + return AllocateUnassignedDecision.no( + AllocationStatus.NO_VALID_SHARD_COPY, + explain ? buildNodeDecisions(null, shardState, inSyncAllocationIds) : null + ); + } + } + + NodesToAllocate nodesToAllocate = buildNodesToAllocate( + allocation, + nodeShardsResult.orderedAllocationCandidates, + unassignedShard, + false + ); + DiscoveryNode node = null; + String allocationId = null; + boolean throttled = false; + if (nodesToAllocate.yesNodeShards.isEmpty() == false) { + DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); + logger.debug( + "[{}][{}]: allocating [{}] to [{}] on primary allocation", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard, + decidedNode.getNode() + ); + node = decidedNode.getNode(); + allocationId = decidedNode.nodeShardState.allocationId(); + } else if (nodesToAllocate.throttleNodeShards.isEmpty() && !nodesToAllocate.noNodeShards.isEmpty()) { + // The deciders returned a NO decision for all nodes with shard copies, so we check if primary shard + // can be force-allocated to one of the nodes. + nodesToAllocate = buildNodesToAllocate(allocation, nodeShardsResult.orderedAllocationCandidates, unassignedShard, true); + if (nodesToAllocate.yesNodeShards.isEmpty() == false) { + final DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); + final NodeGatewayStartedShards nodeShardState = decidedNode.nodeShardState; + logger.debug( + "[{}][{}]: allocating [{}] to [{}] on forced primary allocation", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard, + decidedNode.getNode() + ); + node = decidedNode.getNode(); + allocationId = nodeShardState.allocationId(); + } else if (nodesToAllocate.throttleNodeShards.isEmpty() == false) { + logger.debug( + "[{}][{}]: throttling allocation [{}] to [{}] on forced primary allocation", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard, + nodesToAllocate.throttleNodeShards + ); + throttled = true; + } else { + logger.debug( + "[{}][{}]: forced primary allocation denied [{}]", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard + ); + } + } else { + // we are throttling this, since we are allowed to allocate to this node but there are enough allocations + // taking place on the node currently, ignore it for now + logger.debug( + "[{}][{}]: throttling allocation [{}] to [{}] on primary allocation", + unassignedShard.index(), + unassignedShard.id(), + unassignedShard, + nodesToAllocate.throttleNodeShards + ); + throttled = true; + } + + List nodeResults = null; + if (explain) { + nodeResults = buildNodeDecisions(nodesToAllocate, shardState, inSyncAllocationIds); + } + if (allocation.hasPendingAsyncFetch()) { + return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeResults); + } else if (node != null) { + return AllocateUnassignedDecision.yes(node, allocationId, nodeResults, false); + } else if (throttled) { + return AllocateUnassignedDecision.throttle(nodeResults); + } else { + return AllocateUnassignedDecision.no(AllocationStatus.DECIDERS_NO, nodeResults, true); + } + } + + /** + * Skip doing fetchData call for a shard if recovery mode is snapshot. Also do not take decision if allocator is + * not responsible for this particular shard. + * @param unassignedShard unassigned shard routing + * @param allocation routing allocation object + * @return allocation decision taken for this shard + */ + private AllocateUnassignedDecision skipSnapshotRestore(ShardRouting unassignedShard, RoutingAllocation allocation) { + if (isResponsibleFor(unassignedShard) == false) { + // this allocator is not responsible for allocating this shard + return AllocateUnassignedDecision.NOT_TAKEN; + } + final boolean explain = allocation.debugDecision(); + if (unassignedShard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT + && allocation.snapshotShardSizeInfo().getShardSize(unassignedShard) == null) { + List nodeDecisions = null; + if (explain) { + nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation); + } + return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions); + } + return null; + } + + /** + * Builds a map of nodes to the corresponding allocation decisions for those nodes. + */ + private static List buildNodeDecisions( + NodesToAllocate nodesToAllocate, + Map fetchedShardData, + Set inSyncAllocationIds + ) { + List nodeResults = new ArrayList<>(); + Map ineligibleShards; + if (nodesToAllocate != null) { + final Set discoNodes = new HashSet<>(); + nodeResults.addAll( + Stream.of(nodesToAllocate.yesNodeShards, nodesToAllocate.throttleNodeShards, nodesToAllocate.noNodeShards) + .flatMap(Collection::stream) + .map(dnode -> { + discoNodes.add(dnode.getNode()); + return new NodeAllocationResult( + dnode.getNode(), + shardStoreInfo(dnode.nodeShardState, inSyncAllocationIds), + dnode.decision + ); + }) + .collect(Collectors.toList()) + ); + + ineligibleShards = fetchedShardData.entrySet() + .stream() + .filter(shardData -> discoNodes.contains(shardData.getKey()) == false) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } else { + // there were no shard copies that were eligible for being assigned the allocation, + // so all fetched shard data are ineligible shards + ineligibleShards = fetchedShardData; + } + + nodeResults.addAll( + ineligibleShards.entrySet().stream() + .map(shardData -> new NodeAllocationResult(shardData.getKey(), shardStoreInfo(shardData.getValue(), + inSyncAllocationIds), null)) + .collect(Collectors.toList()) + ); + + return nodeResults; + } + + private static ShardStoreInfo shardStoreInfo(NodeGatewayStartedShards nodeShardState, Set inSyncAllocationIds) { + final Exception storeErr = nodeShardState.storeException(); + final boolean inSync = nodeShardState.allocationId() != null && inSyncAllocationIds.contains(nodeShardState.allocationId()); + return new ShardStoreInfo(nodeShardState.allocationId(), inSync, storeErr); + } + + private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( + (NodeGatewayStartedShards state) -> state.storeException() == null + ).reversed(); + private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayStartedShards::primary + ).reversed(); + + private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayStartedShards::replicationCheckpoint, + Comparator.nullsLast(Comparator.naturalOrder()) + ); + + /** + * Builds a list of nodes. If matchAnyShard is set to false, only nodes that have an allocation id matching + * inSyncAllocationIds are added to the list. Otherwise, any node that has a shard is added to the list, but + * entries with matching allocation id are always at the front of the list. + */ + protected static NodeShardsResult buildNodeShardsResult( + ShardRouting shard, + boolean matchAnyShard, + Set ignoreNodes, + Set inSyncAllocationIds, + Map shardState, + Logger logger + ) { + /** + * Orders the active shards copies based on below comparators + * 1. No store exception i.e. shard copy is readable + * 2. Prefer previous primary shard + * 3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices. + */ + final Comparator comparator; // allocation preference + if (matchAnyShard) { + // prefer shards with matching allocation ids + Comparator matchingAllocationsFirst = Comparator.comparing( + (NodeGatewayStartedShards state) -> inSyncAllocationIds.contains(state.allocationId()) + ).reversed(); + comparator = matchingAllocationsFirst.thenComparing(NO_STORE_EXCEPTION_FIRST_COMPARATOR) + .thenComparing(PRIMARY_FIRST_COMPARATOR) + .thenComparing(HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR); + } else { + comparator = NO_STORE_EXCEPTION_FIRST_COMPARATOR.thenComparing(PRIMARY_FIRST_COMPARATOR) + .thenComparing(HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR); + } + // TreeMap will sort the entries based on key, comparator is assigned above + TreeMap shardStatesToNode = new TreeMap<>(comparator); + int numberOfAllocationsFound = 0; + for (Map.Entry nodeShardStateEntry : shardState.entrySet()) { + DiscoveryNode node = nodeShardStateEntry.getKey(); + NodeGatewayStartedShards nodeShardState = nodeShardStateEntry.getValue(); + String allocationId = nodeShardState.allocationId(); + + if (ignoreNodes.contains(node.getId())) { + continue; + } + + if (nodeShardState.storeException() == null) { + if (allocationId == null) { + logger.trace("[{}] on node [{}] has no shard state information", shard, node); + } else { + logger.trace("[{}] on node [{}] has allocation id [{}]", shard, node, allocationId); + } + } else { + final String finalAllocationId = allocationId; + if (nodeShardState.storeException() instanceof ShardLockObtainFailedException) { + logger.trace( + () -> new ParameterizedMessage( + "[{}] on node [{}] has allocation id [{}] but the store can not be " + + "opened as it's locked, treating as valid shard", + shard, + node, + finalAllocationId + ), + nodeShardState.storeException() + ); + } else { + logger.trace( + () -> new ParameterizedMessage( + "[{}] on node [{}] has allocation id [{}] but the store can not be " + "opened, treating as no allocation id", + shard, + node, + finalAllocationId + ), + nodeShardState.storeException() + ); + allocationId = null; + } + } + + if (allocationId != null) { + assert nodeShardState.storeException() == null || nodeShardState.storeException() instanceof ShardLockObtainFailedException + : "only allow store that can be opened or that throws a ShardLockObtainFailedException while being opened but got a " + + "store throwing " + + nodeShardState.storeException(); + numberOfAllocationsFound++; + if (matchAnyShard || inSyncAllocationIds.contains(nodeShardState.allocationId())) { + shardStatesToNode.put(nodeShardState, node); + } + } + } + + if (logger.isTraceEnabled()) { + logger.trace( + "{} candidates for allocation: {}", + shard, + shardState.keySet().stream().map(DiscoveryNode::getName).collect(Collectors.joining(", ")) + ); + } + return new NodeShardsResult(shardStatesToNode, numberOfAllocationsFound); + } + + /** + * Split the list of node shard states into groups yes/no/throttle based on allocation deciders + */ + private static NodesToAllocate buildNodesToAllocate( + RoutingAllocation allocation, + TreeMap shardStateToNode, + ShardRouting shardRouting, + boolean forceAllocate + ) { + List yesNodeShards = new ArrayList<>(); + List throttledNodeShards = new ArrayList<>(); + List noNodeShards = new ArrayList<>(); + for (Map.Entry nodeShardState : shardStateToNode.entrySet()) { + RoutingNode node = allocation.routingNodes().node(nodeShardState.getValue().getId()); + if (node == null) { + continue; + } + + Decision decision = forceAllocate + ? allocation.deciders().canForceAllocatePrimary(shardRouting, node, allocation) + : allocation.deciders().canAllocate(shardRouting, node, allocation); + DecidedNode decidedNode = new DecidedNode(nodeShardState.getKey(), decision, nodeShardState.getValue()); + if (decision.type() == Type.THROTTLE) { + throttledNodeShards.add(decidedNode); + } else if (decision.type() == Type.NO) { + noNodeShards.add(decidedNode); + } else { + yesNodeShards.add(decidedNode); + } + } + return new NodesToAllocate( + Collections.unmodifiableList(yesNodeShards), + Collections.unmodifiableList(throttledNodeShards), + Collections.unmodifiableList(noNodeShards) + ); + } + + private static class NodeShardsResult { + final TreeMap orderedAllocationCandidates; + final int allocationsFound; + + NodeShardsResult(TreeMap orderedAllocationCandidates, int allocationsFound) { + this.orderedAllocationCandidates = orderedAllocationCandidates; + this.allocationsFound = allocationsFound; + } + } + + static class NodesToAllocate { + final List yesNodeShards; + final List throttleNodeShards; + final List noNodeShards; + + NodesToAllocate(List yesNodeShards, List throttleNodeShards, List noNodeShards) { + this.yesNodeShards = yesNodeShards; + this.throttleNodeShards = throttleNodeShards; + this.noNodeShards = noNodeShards; + } + } + + /** + * This class encapsulates the shard state retrieved from a node and the decision that was made + * by the allocator for allocating to the node that holds the shard copy. + */ + private static class DecidedNode { + final NodeGatewayStartedShards nodeShardState; + final Decision decision; + final DiscoveryNode node; + + private DecidedNode(NodeGatewayStartedShards nodeShardState, Decision decision, DiscoveryNode node) { + this.nodeShardState = nodeShardState; + this.decision = decision; + this.node = node; + } + + public DiscoveryNode getNode() { + return node; + } + } +} From 6c383034184cafa6de55c251ca5bd32662a0947f Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Thu, 21 Sep 2023 16:29:01 +0530 Subject: [PATCH 02/10] Add unit tests and refactor PrimaryShardBatchAllocator Signed-off-by: Aman Khare --- .../gateway/PrimaryShardBatchAllocator.java | 540 +++--------------- .../PrimaryShardBatchAllocatorTests.java | 339 +++++++++++ 2 files changed, 410 insertions(+), 469 deletions(-) create mode 100644 server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 36c81100793cf..0dfd8d5718e54 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -8,38 +8,23 @@ package org.opensearch.gateway; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.RecoverySource; -import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.ShardRouting; -import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.UnassignedInfo.AllocationStatus; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.NodeAllocationResult; -import org.opensearch.cluster.routing.allocation.NodeAllocationResult.ShardStoreInfo; import org.opensearch.cluster.routing.allocation.RoutingAllocation; -import org.opensearch.cluster.routing.allocation.decider.Decision; -import org.opensearch.cluster.routing.allocation.decider.Decision.Type; -import org.opensearch.env.ShardLockObtainFailedException; -import org.opensearch.gateway.AsyncBatchShardFetch.FetchResult; +import org.opensearch.gateway.AsyncShardFetch.FetchResult; import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards; -import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeMap; -import java.util.stream.Collectors; -import java.util.stream.Stream; /** * PrimaryShardBatchAllocator is similar to {@link org.opensearch.gateway.PrimaryShardAllocator} only difference is @@ -59,49 +44,48 @@ * * @opensearch.internal */ -public abstract class PrimaryShardBatchAllocator extends BaseGatewayShardAllocator { - /** - * Is the allocator responsible for allocating the given {@link ShardRouting}? - */ - private static boolean isResponsibleFor(final ShardRouting shard) { - return shard.primary() // must be primary - && shard.unassigned() // must be unassigned - // only handle either an existing store or a snapshot recovery - && (shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE - || shard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT); - } +public abstract class PrimaryShardBatchAllocator extends PrimaryShardAllocator { - abstract protected FetchResult fetchData(Set shardsEligibleForFetch, - Set inEligibleShards, - RoutingAllocation allocation); + abstract protected FetchResult fetchData( + Set shardsEligibleForFetch, + Set inEligibleShards, + RoutingAllocation allocation + ); + + protected FetchResult fetchData( + ShardRouting shard, + RoutingAllocation allocation + ) { + return null; + } @Override - public AllocateUnassignedDecision makeAllocationDecision(ShardRouting unassignedShard, - RoutingAllocation allocation, - Logger logger) { + public AllocateUnassignedDecision makeAllocationDecision(ShardRouting unassignedShard, RoutingAllocation allocation, Logger logger) { - return makeAllocationDecision(new HashSet<>(Collections.singletonList(unassignedShard)), - allocation, logger).get(unassignedShard); + return makeAllocationDecision(new HashSet<>(Collections.singletonList(unassignedShard)), allocation, logger).get(unassignedShard); } /** - * Build allocation decisions for all the shards present in the batch identified by batchId. - * @param shards set of shards given for allocation + * Build allocation decisions for all the shards given to this allocator.. + * + * @param shards set of shards given for allocation * @param allocation current allocation of all the shards - * @param logger logger used for logging + * @param logger logger used for logging * @return shard to allocation decision map */ @Override - public HashMap makeAllocationDecision(Set shards, - RoutingAllocation allocation, - Logger logger) { + public HashMap makeAllocationDecision( + Set shards, + RoutingAllocation allocation, + Logger logger + ) { HashMap shardAllocationDecisions = new HashMap<>(); final boolean explain = allocation.debugDecision(); Set shardsEligibleForFetch = new HashSet<>(); Set shardsNotEligibleForFetch = new HashSet<>(); // identify ineligible shards for (ShardRouting shard : shards) { - AllocateUnassignedDecision decision = skipSnapshotRestore(shard, allocation); + AllocateUnassignedDecision decision = getInEligibleShardDecision(shard, allocation); if (decision != null) { shardsNotEligibleForFetch.add(shard); shardAllocationDecisions.put(shard, decision); @@ -109,12 +93,16 @@ public HashMap makeAllocationDecision( shardsEligibleForFetch.add(shard); } } + // Do not call fetchData if there are no eligible shards + if (shardsEligibleForFetch.size() == 0) { + return shardAllocationDecisions; + } // only fetch data for eligible shards - final FetchResult shardsState = fetchData(shardsEligibleForFetch, shardsNotEligibleForFetch, allocation); - // Note : shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map - // and construct the nodeShardState along the way before making any allocation decision. As metadata for a - // particular shard is needed from all the discovery nodes. + final FetchResult shardsState = fetchData( + shardsEligibleForFetch, + shardsNotEligibleForFetch, + allocation + ); // process the received data for (ShardRouting unassignedShard : shardsEligibleForFetch) { @@ -125,438 +113,52 @@ public HashMap makeAllocationDecision( if (explain) { nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation); } - shardAllocationDecisions.put(unassignedShard, - AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, - nodeDecisions)); - } else { - Map nodeResponses = shardsState.getData(); - Map shardData = new HashMap<>(); - // build data for a shard from all the nodes - for (Map.Entry nodeEntry : nodeResponses.entrySet()) { - shardData.put(nodeEntry.getKey(), - nodeEntry.getValue().getNodeGatewayStartedShardsBatch().get(unassignedShard.shardId())); - } - // get allocation decision for this shard - shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, - shardData, logger)); - } - } - return shardAllocationDecisions; - } - - /** - * Below code is very similar to {@link org.opensearch.gateway.PrimaryShardAllocator} class makeAllocationDecision, - * only difference is that NodeGatewayStartedShards object doesn't have the DiscoveryNode object as - * BaseNodeResponse. So, DiscoveryNode reference is passed in Map so - * corresponding DiscoveryNode object can be used for rest of the implementation. Also, DiscoveryNode object - * reference is added in DecidedNode class to achieve same use case of accessing corresponding DiscoveryNode object. - * @param unassignedShard unassigned shard routing - * @param allocation routing allocation object - * @param shardState shard metadata fetched from all data nodes - * @param logger logger - * @return allocation decision taken for this shard - */ - private AllocateUnassignedDecision getAllocationDecision(ShardRouting unassignedShard, RoutingAllocation allocation, - Map shardState, - Logger logger) { - final boolean explain = allocation.debugDecision(); - // don't create a new IndexSetting object for every shard as this could cause a lot of garbage - // on cluster restart if we allocate a boat load of shards - final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(unassignedShard.index()); - final Set inSyncAllocationIds = indexMetadata.inSyncAllocationIds(unassignedShard.id()); - final boolean snapshotRestore = unassignedShard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT; - - assert inSyncAllocationIds.isEmpty() == false; - // use in-sync allocation ids to select nodes - final PrimaryShardBatchAllocator.NodeShardsResult nodeShardsResult = buildNodeShardsResult( - unassignedShard, - snapshotRestore, - allocation.getIgnoreNodes(unassignedShard.shardId()), - inSyncAllocationIds, - shardState, - logger - ); - final boolean enoughAllocationsFound = nodeShardsResult.orderedAllocationCandidates.size() > 0; - logger.debug( - "[{}][{}]: found {} allocation candidates of {} based on allocation ids: [{}]", - unassignedShard.index(), - unassignedShard.id(), - nodeShardsResult.orderedAllocationCandidates.size(), - unassignedShard, - inSyncAllocationIds - ); - - if (enoughAllocationsFound == false) { - if (snapshotRestore) { - // let BalancedShardsAllocator take care of allocating this shard - logger.debug( - "[{}][{}]: missing local data, will restore from [{}]", - unassignedShard.index(), - unassignedShard.id(), - unassignedShard.recoverySource() - ); - return AllocateUnassignedDecision.NOT_TAKEN; - } else { - // We have a shard that was previously allocated, but we could not find a valid shard copy to allocate the primary. - // We could just be waiting for the node that holds the primary to start back up, in which case the allocation for - // this shard will be picked up when the node joins and we do another allocation reroute - logger.debug( - "[{}][{}]: not allocating, number_of_allocated_shards_found [{}]", - unassignedShard.index(), - unassignedShard.id(), - nodeShardsResult.allocationsFound - ); - return AllocateUnassignedDecision.no( - AllocationStatus.NO_VALID_SHARD_COPY, - explain ? buildNodeDecisions(null, shardState, inSyncAllocationIds) : null - ); - } - } - - NodesToAllocate nodesToAllocate = buildNodesToAllocate( - allocation, - nodeShardsResult.orderedAllocationCandidates, - unassignedShard, - false - ); - DiscoveryNode node = null; - String allocationId = null; - boolean throttled = false; - if (nodesToAllocate.yesNodeShards.isEmpty() == false) { - DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); - logger.debug( - "[{}][{}]: allocating [{}] to [{}] on primary allocation", - unassignedShard.index(), - unassignedShard.id(), - unassignedShard, - decidedNode.getNode() - ); - node = decidedNode.getNode(); - allocationId = decidedNode.nodeShardState.allocationId(); - } else if (nodesToAllocate.throttleNodeShards.isEmpty() && !nodesToAllocate.noNodeShards.isEmpty()) { - // The deciders returned a NO decision for all nodes with shard copies, so we check if primary shard - // can be force-allocated to one of the nodes. - nodesToAllocate = buildNodesToAllocate(allocation, nodeShardsResult.orderedAllocationCandidates, unassignedShard, true); - if (nodesToAllocate.yesNodeShards.isEmpty() == false) { - final DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); - final NodeGatewayStartedShards nodeShardState = decidedNode.nodeShardState; - logger.debug( - "[{}][{}]: allocating [{}] to [{}] on forced primary allocation", - unassignedShard.index(), - unassignedShard.id(), - unassignedShard, - decidedNode.getNode() - ); - node = decidedNode.getNode(); - allocationId = nodeShardState.allocationId(); - } else if (nodesToAllocate.throttleNodeShards.isEmpty() == false) { - logger.debug( - "[{}][{}]: throttling allocation [{}] to [{}] on forced primary allocation", - unassignedShard.index(), - unassignedShard.id(), + shardAllocationDecisions.put( unassignedShard, - nodesToAllocate.throttleNodeShards + AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions) ); - throttled = true; } else { - logger.debug( - "[{}][{}]: forced primary allocation denied [{}]", - unassignedShard.index(), - unassignedShard.id(), - unassignedShard - ); - } - } else { - // we are throttling this, since we are allowed to allocate to this node but there are enough allocations - // taking place on the node currently, ignore it for now - logger.debug( - "[{}][{}]: throttling allocation [{}] to [{}] on primary allocation", - unassignedShard.index(), - unassignedShard.id(), - unassignedShard, - nodesToAllocate.throttleNodeShards - ); - throttled = true; - } - - List nodeResults = null; - if (explain) { - nodeResults = buildNodeDecisions(nodesToAllocate, shardState, inSyncAllocationIds); - } - if (allocation.hasPendingAsyncFetch()) { - return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeResults); - } else if (node != null) { - return AllocateUnassignedDecision.yes(node, allocationId, nodeResults, false); - } else if (throttled) { - return AllocateUnassignedDecision.throttle(nodeResults); - } else { - return AllocateUnassignedDecision.no(AllocationStatus.DECIDERS_NO, nodeResults, true); - } - } - - /** - * Skip doing fetchData call for a shard if recovery mode is snapshot. Also do not take decision if allocator is - * not responsible for this particular shard. - * @param unassignedShard unassigned shard routing - * @param allocation routing allocation object - * @return allocation decision taken for this shard - */ - private AllocateUnassignedDecision skipSnapshotRestore(ShardRouting unassignedShard, RoutingAllocation allocation) { - if (isResponsibleFor(unassignedShard) == false) { - // this allocator is not responsible for allocating this shard - return AllocateUnassignedDecision.NOT_TAKEN; - } - final boolean explain = allocation.debugDecision(); - if (unassignedShard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT - && allocation.snapshotShardSizeInfo().getShardSize(unassignedShard) == null) { - List nodeDecisions = null; - if (explain) { - nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation); - } - return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions); - } - return null; - } - /** - * Builds a map of nodes to the corresponding allocation decisions for those nodes. - */ - private static List buildNodeDecisions( - NodesToAllocate nodesToAllocate, - Map fetchedShardData, - Set inSyncAllocationIds - ) { - List nodeResults = new ArrayList<>(); - Map ineligibleShards; - if (nodesToAllocate != null) { - final Set discoNodes = new HashSet<>(); - nodeResults.addAll( - Stream.of(nodesToAllocate.yesNodeShards, nodesToAllocate.throttleNodeShards, nodesToAllocate.noNodeShards) - .flatMap(Collection::stream) - .map(dnode -> { - discoNodes.add(dnode.getNode()); - return new NodeAllocationResult( - dnode.getNode(), - shardStoreInfo(dnode.nodeShardState, inSyncAllocationIds), - dnode.decision - ); - }) - .collect(Collectors.toList()) - ); - - ineligibleShards = fetchedShardData.entrySet() - .stream() - .filter(shardData -> discoNodes.contains(shardData.getKey()) == false) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } else { - // there were no shard copies that were eligible for being assigned the allocation, - // so all fetched shard data are ineligible shards - ineligibleShards = fetchedShardData; - } - - nodeResults.addAll( - ineligibleShards.entrySet().stream() - .map(shardData -> new NodeAllocationResult(shardData.getKey(), shardStoreInfo(shardData.getValue(), - inSyncAllocationIds), null)) - .collect(Collectors.toList()) - ); - - return nodeResults; - } - - private static ShardStoreInfo shardStoreInfo(NodeGatewayStartedShards nodeShardState, Set inSyncAllocationIds) { - final Exception storeErr = nodeShardState.storeException(); - final boolean inSync = nodeShardState.allocationId() != null && inSyncAllocationIds.contains(nodeShardState.allocationId()); - return new ShardStoreInfo(nodeShardState.allocationId(), inSync, storeErr); - } - - private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( - (NodeGatewayStartedShards state) -> state.storeException() == null - ).reversed(); - private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayStartedShards::primary - ).reversed(); - - private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayStartedShards::replicationCheckpoint, - Comparator.nullsLast(Comparator.naturalOrder()) - ); - - /** - * Builds a list of nodes. If matchAnyShard is set to false, only nodes that have an allocation id matching - * inSyncAllocationIds are added to the list. Otherwise, any node that has a shard is added to the list, but - * entries with matching allocation id are always at the front of the list. - */ - protected static NodeShardsResult buildNodeShardsResult( - ShardRouting shard, - boolean matchAnyShard, - Set ignoreNodes, - Set inSyncAllocationIds, - Map shardState, - Logger logger - ) { - /** - * Orders the active shards copies based on below comparators - * 1. No store exception i.e. shard copy is readable - * 2. Prefer previous primary shard - * 3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices. - */ - final Comparator comparator; // allocation preference - if (matchAnyShard) { - // prefer shards with matching allocation ids - Comparator matchingAllocationsFirst = Comparator.comparing( - (NodeGatewayStartedShards state) -> inSyncAllocationIds.contains(state.allocationId()) - ).reversed(); - comparator = matchingAllocationsFirst.thenComparing(NO_STORE_EXCEPTION_FIRST_COMPARATOR) - .thenComparing(PRIMARY_FIRST_COMPARATOR) - .thenComparing(HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR); - } else { - comparator = NO_STORE_EXCEPTION_FIRST_COMPARATOR.thenComparing(PRIMARY_FIRST_COMPARATOR) - .thenComparing(HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR); - } - // TreeMap will sort the entries based on key, comparator is assigned above - TreeMap shardStatesToNode = new TreeMap<>(comparator); - int numberOfAllocationsFound = 0; - for (Map.Entry nodeShardStateEntry : shardState.entrySet()) { - DiscoveryNode node = nodeShardStateEntry.getKey(); - NodeGatewayStartedShards nodeShardState = nodeShardStateEntry.getValue(); - String allocationId = nodeShardState.allocationId(); - - if (ignoreNodes.contains(node.getId())) { - continue; - } - - if (nodeShardState.storeException() == null) { - if (allocationId == null) { - logger.trace("[{}] on node [{}] has no shard state information", shard, node); - } else { - logger.trace("[{}] on node [{}] has allocation id [{}]", shard, node, allocationId); - } - } else { - final String finalAllocationId = allocationId; - if (nodeShardState.storeException() instanceof ShardLockObtainFailedException) { - logger.trace( - () -> new ParameterizedMessage( - "[{}] on node [{}] has allocation id [{}] but the store can not be " - + "opened as it's locked, treating as valid shard", - shard, - node, - finalAllocationId - ), - nodeShardState.storeException() - ); - } else { - logger.trace( - () -> new ParameterizedMessage( - "[{}] on node [{}] has allocation id [{}] but the store can not be " + "opened, treating as no allocation id", - shard, - node, - finalAllocationId - ), - nodeShardState.storeException() - ); - allocationId = null; - } - } - - if (allocationId != null) { - assert nodeShardState.storeException() == null || nodeShardState.storeException() instanceof ShardLockObtainFailedException - : "only allow store that can be opened or that throws a ShardLockObtainFailedException while being opened but got a " - + "store throwing " - + nodeShardState.storeException(); - numberOfAllocationsFound++; - if (matchAnyShard || inSyncAllocationIds.contains(nodeShardState.allocationId())) { - shardStatesToNode.put(nodeShardState, node); - } + NodeShardStates nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); + // get allocation decision for this shard + shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); } } - - if (logger.isTraceEnabled()) { - logger.trace( - "{} candidates for allocation: {}", - shard, - shardState.keySet().stream().map(DiscoveryNode::getName).collect(Collectors.joining(", ")) - ); - } - return new NodeShardsResult(shardStatesToNode, numberOfAllocationsFound); + return shardAllocationDecisions; } /** - * Split the list of node shard states into groups yes/no/throttle based on allocation deciders + * shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map + * and construct the nodeShardState along the way before making any allocation decision. As metadata for a + * particular shard is needed from all the discovery nodes. + * + * @param unassignedShard unassigned shard + * @param shardsState fetch data result for the whole batch + * @return shard state returned from each node */ - private static NodesToAllocate buildNodesToAllocate( - RoutingAllocation allocation, - TreeMap shardStateToNode, - ShardRouting shardRouting, - boolean forceAllocate + private static NodeShardStates adaptToNodeShardStates( + ShardRouting unassignedShard, + FetchResult shardsState ) { - List yesNodeShards = new ArrayList<>(); - List throttledNodeShards = new ArrayList<>(); - List noNodeShards = new ArrayList<>(); - for (Map.Entry nodeShardState : shardStateToNode.entrySet()) { - RoutingNode node = allocation.routingNodes().node(nodeShardState.getValue().getId()); - if (node == null) { - continue; - } - - Decision decision = forceAllocate - ? allocation.deciders().canForceAllocatePrimary(shardRouting, node, allocation) - : allocation.deciders().canAllocate(shardRouting, node, allocation); - DecidedNode decidedNode = new DecidedNode(nodeShardState.getKey(), decision, nodeShardState.getValue()); - if (decision.type() == Type.THROTTLE) { - throttledNodeShards.add(decidedNode); - } else if (decision.type() == Type.NO) { - noNodeShards.add(decidedNode); - } else { - yesNodeShards.add(decidedNode); - } - } - return new NodesToAllocate( - Collections.unmodifiableList(yesNodeShards), - Collections.unmodifiableList(throttledNodeShards), - Collections.unmodifiableList(noNodeShards) - ); - } - - private static class NodeShardsResult { - final TreeMap orderedAllocationCandidates; - final int allocationsFound; - - NodeShardsResult(TreeMap orderedAllocationCandidates, int allocationsFound) { - this.orderedAllocationCandidates = orderedAllocationCandidates; - this.allocationsFound = allocationsFound; - } - } - - static class NodesToAllocate { - final List yesNodeShards; - final List throttleNodeShards; - final List noNodeShards; - - NodesToAllocate(List yesNodeShards, List throttleNodeShards, List noNodeShards) { - this.yesNodeShards = yesNodeShards; - this.throttleNodeShards = throttleNodeShards; - this.noNodeShards = noNodeShards; - } - } - - /** - * This class encapsulates the shard state retrieved from a node and the decision that was made - * by the allocator for allocating to the node that holds the shard copy. - */ - private static class DecidedNode { - final NodeGatewayStartedShards nodeShardState; - final Decision decision; - final DiscoveryNode node; - - private DecidedNode(NodeGatewayStartedShards nodeShardState, Decision decision, DiscoveryNode node) { - this.nodeShardState = nodeShardState; - this.decision = decision; - this.node = node; - } - - public DiscoveryNode getNode() { - return node; - } + NodeShardStates nodeShardStates = new NodeShardStates(); + Map nodeResponses = shardsState.getData(); + + // build data for a shard from all the nodes + nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { + NodeGatewayStartedShards shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() + .get(unassignedShard.shardId()); + nodeShardStates.getNodeShardStates() + .add( + new NodeShardState( + node, + shardData.allocationId(), + shardData.primary(), + shardData.replicationCheckpoint(), + shardData.storeException() + ) + ); + }); + return nodeShardStates; } } diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java new file mode 100644 index 0000000000000..b8703192f09b3 --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java @@ -0,0 +1,339 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.gateway; + +import org.apache.lucene.codecs.Codec; +import org.junit.Before; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.AllocationDecision; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.common.Nullable; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.set.Sets; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.CodecService; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.test.IndexSettingsModule; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.opensearch.cluster.routing.UnassignedInfo.Reason.CLUSTER_RECOVERED; + +public class PrimaryShardBatchAllocatorTests extends OpenSearchAllocationTestCase { + + private final ShardId shardId = new ShardId("test", "_na_", 0); + private static Set shardsInBatch; + private final DiscoveryNode node1 = newNode("node1"); + private final DiscoveryNode node2 = newNode("node2"); + private final DiscoveryNode node3 = newNode("node3"); + private TestBatchAllocator batchAllocator; + + public static void setUpShards(int numberOfShards) { + shardsInBatch = new HashSet<>(); + for (int shardNumber = 0; shardNumber < numberOfShards; shardNumber++) { + ShardId shardId = new ShardId("test", "_na_", shardNumber); + shardsInBatch.add(shardId); + } + } + + @Before + public void buildTestAllocator() { + this.batchAllocator = new TestBatchAllocator(); + } + + private void allocateAllUnassigned(final RoutingAllocation allocation) { + final RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator(); + while (iterator.hasNext()) { + batchAllocator.allocateUnassigned(iterator.next(), allocation, iterator); + } + } + + private void allocateAllUnassignedBatch(final RoutingAllocation allocation) { + final RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator(); + Set shardsToBatch = new HashSet<>(); + while (iterator.hasNext()) { + shardsToBatch.add(iterator.next()); + } + batchAllocator.allocateUnassignedBatch(shardsToBatch, allocation); + } + + public void testMakeAllocationDecisionDataFetching() { + final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); + + Set shards = new HashSet<>(); + allocateAllUnassignedBatch(allocation); + ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); + shards.add(shard); + HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); + // verify we get decisions for all the shards + assertEquals(shards.size(), allDecisions.size()); + assertEquals(shards, allDecisions.keySet()); + assertEquals(AllocationDecision.AWAITING_INFO, allDecisions.get(shard).getAllocationDecision()); + } + + public void testMakeAllocationDecisionForReplicaShard() { + final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); + + List replicaShards = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).replicaShards(); + Set shards = new HashSet<>(replicaShards); + HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); + // verify we get decisions for all the shards + assertEquals(shards.size(), allDecisions.size()); + assertEquals(shards, allDecisions.keySet()); + assertEquals(false, allDecisions.get(replicaShards.get(0)).isDecisionTaken()); + } + + public void testMakeAllocationDecisionDataFetched() { + final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); + + Set shards = new HashSet<>(); + ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); + shards.add(shard); + batchAllocator.addData(node1, "allocId1", true, new ReplicationCheckpoint(shardId, 20, 101, 1, Codec.getDefault().getName())); + HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); + // verify we get decisions for all the shards + assertEquals(shards.size(), allDecisions.size()); + assertEquals(shards, allDecisions.keySet()); + assertEquals(AllocationDecision.YES, allDecisions.get(shard).getAllocationDecision()); + } + + public void testMakeAllocationDecisionDataFetchedMultipleShards() { + setUpShards(2); + final RoutingAllocation allocation = routingAllocationWithMultiplePrimaries( + noAllocationDeciders(), + CLUSTER_RECOVERED, + 2, + 0, + "allocId-0", + "allocId-1" + ); + Set shards = new HashSet<>(); + for (ShardId shardId : shardsInBatch) { + ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); + allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard().recoverySource(); + shards.add(shard); + batchAllocator.addShardData( + node1, + "allocId-" + shardId.id(), + shardId, + true, + new ReplicationCheckpoint(shardId, 20, 101, 1, Codec.getDefault().getName()), + null + ); + } + HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); + // verify we get decisions for all the shards + assertEquals(shards.size(), allDecisions.size()); + assertEquals(shards, allDecisions.keySet()); + for (ShardRouting shard : shards) { + assertEquals(AllocationDecision.YES, allDecisions.get(shard).getAllocationDecision()); + } + } + + private RoutingAllocation routingAllocationWithOnePrimary( + AllocationDeciders deciders, + UnassignedInfo.Reason reason, + String... activeAllocationIds + ) { + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder(shardId.getIndexName()) + .settings(settings(Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(1) + .putInSyncAllocationIds(shardId.id(), Sets.newHashSet(activeAllocationIds)) + ) + .build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + switch (reason) { + + case INDEX_CREATED: + routingTableBuilder.addAsNew(metadata.index(shardId.getIndex())); + break; + case CLUSTER_RECOVERED: + routingTableBuilder.addAsRecovery(metadata.index(shardId.getIndex())); + break; + case INDEX_REOPENED: + routingTableBuilder.addAsFromCloseToOpen(metadata.index(shardId.getIndex())); + break; + default: + throw new IllegalArgumentException("can't do " + reason + " for you. teach me"); + } + ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTableBuilder.build()) + .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) + .build(); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, null, System.nanoTime()); + } + + private RoutingAllocation routingAllocationWithMultiplePrimaries( + AllocationDeciders deciders, + UnassignedInfo.Reason reason, + int numberOfShards, + int replicas, + String... activeAllocationIds + ) { + Iterator shardIterator = shardsInBatch.iterator(); + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder(shardId.getIndexName()) + .settings(settings(Version.CURRENT)) + .numberOfShards(numberOfShards) + .numberOfReplicas(replicas) + .putInSyncAllocationIds(shardIterator.next().id(), Sets.newHashSet(activeAllocationIds[0])) + .putInSyncAllocationIds(shardIterator.next().id(), Sets.newHashSet(activeAllocationIds[1])) + ) + .build(); + + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + for (ShardId shardIdFromBatch : shardsInBatch) { + switch (reason) { + case INDEX_CREATED: + routingTableBuilder.addAsNew(metadata.index(shardIdFromBatch.getIndex())); + break; + case CLUSTER_RECOVERED: + routingTableBuilder.addAsRecovery(metadata.index(shardIdFromBatch.getIndex())); + break; + case INDEX_REOPENED: + routingTableBuilder.addAsFromCloseToOpen(metadata.index(shardIdFromBatch.getIndex())); + break; + default: + throw new IllegalArgumentException("can't do " + reason + " for you. teach me"); + } + } + ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTableBuilder.build()) + .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) + .build(); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, null, System.nanoTime()); + } + + class TestBatchAllocator extends PrimaryShardBatchAllocator { + + private Map data; + + public TestBatchAllocator clear() { + data = null; + return this; + } + + public TestBatchAllocator addData( + DiscoveryNode node, + String allocationId, + boolean primary, + ReplicationCheckpoint replicationCheckpoint + ) { + return addData(node, allocationId, primary, replicationCheckpoint, null); + } + + public TestBatchAllocator addData(DiscoveryNode node, String allocationId, boolean primary) { + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", nodeSettings); + return addData( + node, + allocationId, + primary, + ReplicationCheckpoint.empty(shardId, new CodecService(null, indexSettings, null).codec("default").getName()), + null + ); + } + + public TestBatchAllocator addData(DiscoveryNode node, String allocationId, boolean primary, @Nullable Exception storeException) { + Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", nodeSettings); + return addData( + node, + allocationId, + primary, + ReplicationCheckpoint.empty(shardId, new CodecService(null, indexSettings, null).codec("default").getName()), + storeException + ); + } + + public TestBatchAllocator addData( + DiscoveryNode node, + String allocationId, + boolean primary, + ReplicationCheckpoint replicationCheckpoint, + @Nullable Exception storeException + ) { + if (data == null) { + data = new HashMap<>(); + } + Map shardData = Map.of( + shardId, + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards( + allocationId, + primary, + replicationCheckpoint, + storeException + ) + ); + data.put(node, new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch(node, shardData)); + return this; + } + + public TestBatchAllocator addShardData( + DiscoveryNode node, + String allocationId, + ShardId shardId, + boolean primary, + ReplicationCheckpoint replicationCheckpoint, + @Nullable Exception storeException + ) { + if (data == null) { + data = new HashMap<>(); + } + Map shardData = new HashMap<>(); + shardData.put( + shardId, + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards( + allocationId, + primary, + replicationCheckpoint, + storeException + ) + ); + if (data.get(node) != null) shardData.putAll(data.get(node).getNodeGatewayStartedShardsBatch()); + data.put(node, new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch(node, shardData)); + return this; + } + + @Override + protected AsyncShardFetch.FetchResult fetchData( + Set shardsEligibleForFetch, + Set inEligibleShards, + RoutingAllocation allocation + ) { + return new AsyncShardFetch.FetchResult<>(data, Collections.>emptyMap()); + } + } +} From 1a6894006232fd50a41cf56458d0a36b965c3098 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Mon, 4 Dec 2023 15:56:38 +0530 Subject: [PATCH 03/10] Throw exception for single shard calls Signed-off-by: Aman Khare --- .../gateway/PrimaryShardBatchAllocator.java | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 0dfd8d5718e54..4cb313e7c9927 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.UnassignedInfo.AllocationStatus; @@ -16,9 +17,10 @@ import org.opensearch.cluster.routing.allocation.NodeAllocationResult; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; -import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards; +import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShards; +import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShardsBatch; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -56,7 +58,8 @@ protected FetchResult makeAllocationDecision( ) { HashMap shardAllocationDecisions = new HashMap<>(); final boolean explain = allocation.debugDecision(); - Set shardsEligibleForFetch = new HashSet<>(); - Set shardsNotEligibleForFetch = new HashSet<>(); + Set eligibleShards = new HashSet<>(); + Set inEligibleShards = new HashSet<>(); // identify ineligible shards for (ShardRouting shard : shards) { AllocateUnassignedDecision decision = getInEligibleShardDecision(shard, allocation); if (decision != null) { - shardsNotEligibleForFetch.add(shard); + inEligibleShards.add(shard); shardAllocationDecisions.put(shard, decision); } else { - shardsEligibleForFetch.add(shard); + eligibleShards.add(shard); } } // Do not call fetchData if there are no eligible shards - if (shardsEligibleForFetch.size() == 0) { + if (eligibleShards.isEmpty()) { return shardAllocationDecisions; } // only fetch data for eligible shards final FetchResult shardsState = fetchData( - shardsEligibleForFetch, - shardsNotEligibleForFetch, + eligibleShards, + inEligibleShards, allocation ); + // Note : shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map + // and construct the nodeShardState along the way before making any allocation decision. As metadata for a + // particular shard is needed from all the discovery nodes. // process the received data - for (ShardRouting unassignedShard : shardsEligibleForFetch) { + for (ShardRouting unassignedShard : eligibleShards) { if (shardsState.hasData() == false) { // if fetching is not done, add that no decision in the resultant map allocation.setHasPendingAsyncFetch(); @@ -119,7 +126,7 @@ public HashMap makeAllocationDecision( ); } else { - NodeShardStates nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); + List nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); // get allocation decision for this shard shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); } @@ -137,27 +144,26 @@ public HashMap makeAllocationDecision( * @param shardsState fetch data result for the whole batch * @return shard state returned from each node */ - private static NodeShardStates adaptToNodeShardStates( + private static List adaptToNodeShardStates( ShardRouting unassignedShard, FetchResult shardsState ) { - NodeShardStates nodeShardStates = new NodeShardStates(); + List nodeShardStates = new ArrayList<>(); Map nodeResponses = shardsState.getData(); // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { NodeGatewayStartedShards shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() .get(unassignedShard.shardId()); - nodeShardStates.getNodeShardStates() - .add( - new NodeShardState( - node, - shardData.allocationId(), - shardData.primary(), - shardData.replicationCheckpoint(), - shardData.storeException() - ) - ); + nodeShardStates.add( + new NodeShardState( + node, + shardData.allocationId(), + shardData.primary(), + shardData.replicationCheckpoint(), + shardData.storeException() + ) + ); }); return nodeShardStates; } From dc7eb43e37ecc91e1cf0b797b8bc98e9817bd8b8 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Wed, 13 Dec 2023 15:26:30 +0530 Subject: [PATCH 04/10] Modify according to transport PRs. Signed-off-by: Aman Khare --- .../gateway/PrimaryShardBatchAllocator.java | 62 +++++++------------ 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 4cb313e7c9927..6a13df1b688ba 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -5,19 +5,17 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ + package org.opensearch.gateway; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.ShardRouting; -import org.opensearch.cluster.routing.UnassignedInfo.AllocationStatus; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; -import org.opensearch.cluster.routing.allocation.NodeAllocationResult; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShards; +import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShard; import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShardsBatch; import java.util.ArrayList; @@ -64,12 +62,11 @@ protected FetchResult(Collections.singletonList(unassignedShard)), allocation, logger).get(unassignedShard); } /** - * Build allocation decisions for all the shards given to this allocator. + * Build allocation decisions for all the shards present in the batch identified by batchId. * * @param shards set of shards given for allocation * @param allocation current allocation of all the shards @@ -83,7 +80,6 @@ public HashMap makeAllocationDecision( Logger logger ) { HashMap shardAllocationDecisions = new HashMap<>(); - final boolean explain = allocation.debugDecision(); Set eligibleShards = new HashSet<>(); Set inEligibleShards = new HashSet<>(); // identify ineligible shards @@ -101,42 +97,27 @@ public HashMap makeAllocationDecision( return shardAllocationDecisions; } // only fetch data for eligible shards - final FetchResult shardsState = fetchData( - eligibleShards, - inEligibleShards, - allocation - ); - // Note : shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map - // and construct the nodeShardState along the way before making any allocation decision. As metadata for a - // particular shard is needed from all the discovery nodes. + final FetchResult shardsState = fetchData(eligibleShards, inEligibleShards, allocation); // process the received data for (ShardRouting unassignedShard : eligibleShards) { - if (shardsState.hasData() == false) { - // if fetching is not done, add that no decision in the resultant map - allocation.setHasPendingAsyncFetch(); - List nodeDecisions = null; - if (explain) { - nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation); - } - shardAllocationDecisions.put( - unassignedShard, - AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions) - ); - } else { - - List nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); - // get allocation decision for this shard - shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); - } + List nodeShardStates = adaptToNodeShardStates( + unassignedShard, + shardsState + ); + // get allocation decision for this shard + shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); } return shardAllocationDecisions; } /** - * shardsState contain the Data, there key is DiscoveryNode but value is Map so to get one shard level data (from all the nodes), we'll traverse the map + * Transforms {@link FetchResult} of {@link NodeGatewayStartedShardsBatch} to {@link List} of {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards}. + *

+ * Returns null if {@link FetchResult} does not have any data. + *

+ * shardsState contain the Data, there key is DiscoveryNode but value is Map of ShardId + * and NodeGatewayStartedShardsBatch so to get one shard level data (from all the nodes), we'll traverse the map * and construct the nodeShardState along the way before making any allocation decision. As metadata for a * particular shard is needed from all the discovery nodes. * @@ -144,19 +125,22 @@ public HashMap makeAllocationDecision( * @param shardsState fetch data result for the whole batch * @return shard state returned from each node */ - private static List adaptToNodeShardStates( + private static List adaptToNodeShardStates( ShardRouting unassignedShard, FetchResult shardsState ) { - List nodeShardStates = new ArrayList<>(); + if (!shardsState.hasData()) { + return null; + } + List nodeShardStates = new ArrayList<>(); Map nodeResponses = shardsState.getData(); // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { - NodeGatewayStartedShards shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() + NodeGatewayStartedShard shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() .get(unassignedShard.shardId()); nodeShardStates.add( - new NodeShardState( + new TransportNodesListGatewayStartedShards.NodeGatewayStartedShards( node, shardData.allocationId(), shardData.primary(), From e0c49437e89f02a8c5bb2126d7b3903682d1dfcc Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Tue, 27 Feb 2024 13:41:38 +0530 Subject: [PATCH 05/10] Use List instead of Set Signed-off-by: Aman Khare --- .../gateway/PrimaryShardBatchAllocator.java | 18 ++++----- .../PrimaryShardBatchAllocatorTests.java | 37 ++++++++++--------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 6a13df1b688ba..d073482405ec2 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -15,16 +15,14 @@ import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShard; -import org.opensearch.gateway.TransportNodesListGatewayStartedBatchShards.NodeGatewayStartedShardsBatch; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; /** * PrimaryShardBatchAllocator is similar to {@link org.opensearch.gateway.PrimaryShardAllocator} only difference is @@ -47,8 +45,8 @@ public abstract class PrimaryShardBatchAllocator extends PrimaryShardAllocator { abstract protected FetchResult fetchData( - Set shardsEligibleForFetch, - Set inEligibleShards, + List eligibleShards, + List inEligibleShards, RoutingAllocation allocation ); @@ -62,7 +60,7 @@ protected FetchResult(Collections.singletonList(unassignedShard)), allocation, logger).get(unassignedShard); + return makeAllocationDecision(Collections.singletonList(unassignedShard), allocation, logger).get(unassignedShard); } /** @@ -75,13 +73,13 @@ public AllocateUnassignedDecision makeAllocationDecision(ShardRouting unassigned */ @Override public HashMap makeAllocationDecision( - Set shards, + List shards, RoutingAllocation allocation, Logger logger ) { HashMap shardAllocationDecisions = new HashMap<>(); - Set eligibleShards = new HashSet<>(); - Set inEligibleShards = new HashSet<>(); + List eligibleShards = new ArrayList<>(); + List inEligibleShards = new ArrayList<>(); // identify ineligible shards for (ShardRouting shard : shards) { AllocateUnassignedDecision decision = getInEligibleShardDecision(shard, allocation); diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java index b8703192f09b3..a95328b9ddc39 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java @@ -8,7 +8,6 @@ package org.opensearch.gateway; import org.apache.lucene.codecs.Codec; -import org.junit.Before; import org.opensearch.Version; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; @@ -34,7 +33,9 @@ import org.opensearch.index.codec.CodecService; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.test.IndexSettingsModule; +import org.junit.Before; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -76,7 +77,7 @@ private void allocateAllUnassigned(final RoutingAllocation allocation) { private void allocateAllUnassignedBatch(final RoutingAllocation allocation) { final RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator(); - Set shardsToBatch = new HashSet<>(); + List shardsToBatch = new ArrayList<>(); while (iterator.hasNext()) { shardsToBatch.add(iterator.next()); } @@ -86,14 +87,14 @@ private void allocateAllUnassignedBatch(final RoutingAllocation allocation) { public void testMakeAllocationDecisionDataFetching() { final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); - Set shards = new HashSet<>(); + List shards = new ArrayList<>(); allocateAllUnassignedBatch(allocation); ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); shards.add(shard); HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); // verify we get decisions for all the shards assertEquals(shards.size(), allDecisions.size()); - assertEquals(shards, allDecisions.keySet()); + assertEquals(shards, new ArrayList<>(allDecisions.keySet())); assertEquals(AllocationDecision.AWAITING_INFO, allDecisions.get(shard).getAllocationDecision()); } @@ -101,25 +102,25 @@ public void testMakeAllocationDecisionForReplicaShard() { final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); List replicaShards = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).replicaShards(); - Set shards = new HashSet<>(replicaShards); + List shards = new ArrayList<>(replicaShards); HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); // verify we get decisions for all the shards assertEquals(shards.size(), allDecisions.size()); - assertEquals(shards, allDecisions.keySet()); - assertEquals(false, allDecisions.get(replicaShards.get(0)).isDecisionTaken()); + assertEquals(shards, new ArrayList<>(allDecisions.keySet())); + assertFalse(allDecisions.get(replicaShards.get(0)).isDecisionTaken()); } public void testMakeAllocationDecisionDataFetched() { final RoutingAllocation allocation = routingAllocationWithOnePrimary(noAllocationDeciders(), CLUSTER_RECOVERED, "allocId1"); - Set shards = new HashSet<>(); + List shards = new ArrayList<>(); ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); shards.add(shard); batchAllocator.addData(node1, "allocId1", true, new ReplicationCheckpoint(shardId, 20, 101, 1, Codec.getDefault().getName())); HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); // verify we get decisions for all the shards assertEquals(shards.size(), allDecisions.size()); - assertEquals(shards, allDecisions.keySet()); + assertEquals(shards, new ArrayList<>(allDecisions.keySet())); assertEquals(AllocationDecision.YES, allDecisions.get(shard).getAllocationDecision()); } @@ -133,7 +134,7 @@ public void testMakeAllocationDecisionDataFetchedMultipleShards() { "allocId-0", "allocId-1" ); - Set shards = new HashSet<>(); + List shards = new ArrayList<>(); for (ShardId shardId : shardsInBatch) { ShardRouting shard = allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard(); allocation.routingTable().getIndicesRouting().get("test").shard(shardId.id()).primaryShard().recoverySource(); @@ -150,7 +151,7 @@ public void testMakeAllocationDecisionDataFetchedMultipleShards() { HashMap allDecisions = batchAllocator.makeAllocationDecision(shards, allocation, logger); // verify we get decisions for all the shards assertEquals(shards.size(), allDecisions.size()); - assertEquals(shards, allDecisions.keySet()); + assertEquals(new HashSet<>(shards), allDecisions.keySet()); for (ShardRouting shard : shards) { assertEquals(AllocationDecision.YES, allDecisions.get(shard).getAllocationDecision()); } @@ -185,7 +186,7 @@ private RoutingAllocation routingAllocationWithOnePrimary( default: throw new IllegalArgumentException("can't do " + reason + " for you. teach me"); } - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + ClusterState state = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) .metadata(metadata) .routingTable(routingTableBuilder.build()) .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) @@ -288,9 +289,9 @@ public TestBatchAllocator addData( if (data == null) { data = new HashMap<>(); } - Map shardData = Map.of( + Map shardData = Map.of( shardId, - new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards( + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( allocationId, primary, replicationCheckpoint, @@ -312,10 +313,10 @@ public TestBatchAllocator addShardData( if (data == null) { data = new HashMap<>(); } - Map shardData = new HashMap<>(); + Map shardData = new HashMap<>(); shardData.put( shardId, - new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShards( + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( allocationId, primary, replicationCheckpoint, @@ -329,8 +330,8 @@ public TestBatchAllocator addShardData( @Override protected AsyncShardFetch.FetchResult fetchData( - Set shardsEligibleForFetch, - Set inEligibleShards, + List shardsEligibleForFetch, + List inEligibleShards, RoutingAllocation allocation ) { return new AsyncShardFetch.FetchResult<>(data, Collections.>emptyMap()); From aa3f82dfdd6a8e8564a15a067ce11fe8a0297001 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Fri, 15 Mar 2024 16:27:47 +0530 Subject: [PATCH 06/10] Move GatewayShardStarted in helper class to avoid dependency on TransportNodesListGatewayStartedShards Signed-off-by: Aman Khare --- .../gateway/RecoveryFromGatewayIT.java | 25 +-- .../gateway/PrimaryShardAllocator.java | 65 ++++---- .../gateway/PrimaryShardBatchAllocator.java | 18 +-- ...ansportNodesGatewayStartedShardHelper.java | 149 +++++++++++++++++- ...ransportNodesListGatewayStartedShards.java | 3 +- ...ortNodesListGatewayStartedShardsBatch.java | 132 +--------------- .../PrimaryShardBatchAllocatorTests.java | 8 +- 7 files changed, 216 insertions(+), 184 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 6c248a32c9928..3128b55c17029 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -56,6 +56,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.env.NodeEnvironment; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.MergePolicyProvider; @@ -764,11 +765,11 @@ public void testSingleShardFetchUsingBatchAction() { ); final Index index = resolveIndex(indexName); final ShardId shardId = new ShardId(index, 0); - TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard nodeGatewayStartedShards = response.getNodesMap() + GatewayShardStarted gatewayShardStarted = response.getNodesMap() .get(searchShardsResponse.getNodes()[0].getId()) .getNodeGatewayStartedShardsBatch() .get(shardId); - assertNodeGatewayStartedShardsHappyCase(nodeGatewayStartedShards); + assertNodeGatewayStartedShardsHappyCase(gatewayShardStarted); } public void testShardFetchMultiNodeMultiIndexesUsingBatchAction() { @@ -792,11 +793,11 @@ public void testShardFetchMultiNodeMultiIndexesUsingBatchAction() { ShardId shardId = clusterSearchShardsGroup.getShardId(); assertEquals(1, clusterSearchShardsGroup.getShards().length); String nodeId = clusterSearchShardsGroup.getShards()[0].currentNodeId(); - TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard nodeGatewayStartedShards = response.getNodesMap() + GatewayShardStarted gatewayShardStarted = response.getNodesMap() .get(nodeId) .getNodeGatewayStartedShardsBatch() .get(shardId); - assertNodeGatewayStartedShardsHappyCase(nodeGatewayStartedShards); + assertNodeGatewayStartedShardsHappyCase(gatewayShardStarted); } } @@ -816,13 +817,13 @@ public void testShardFetchCorruptedShardsUsingBatchAction() throws Exception { new TransportNodesListGatewayStartedShardsBatch.Request(getDiscoveryNodes(), shardIdShardAttributesMap) ); DiscoveryNode[] discoveryNodes = getDiscoveryNodes(); - TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard nodeGatewayStartedShards = response.getNodesMap() + GatewayShardStarted gatewayShardStarted = response.getNodesMap() .get(discoveryNodes[0].getId()) .getNodeGatewayStartedShardsBatch() .get(shardId); - assertNotNull(nodeGatewayStartedShards.storeException()); - assertNotNull(nodeGatewayStartedShards.allocationId()); - assertTrue(nodeGatewayStartedShards.primary()); + assertNotNull(gatewayShardStarted.storeException()); + assertNotNull(gatewayShardStarted.allocationId()); + assertTrue(gatewayShardStarted.primary()); } public void testSingleShardStoreFetchUsingBatchAction() throws ExecutionException, InterruptedException { @@ -951,11 +952,11 @@ private void assertNodeStoreFilesMetadataSuccessCase( } private void assertNodeGatewayStartedShardsHappyCase( - TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard nodeGatewayStartedShards + GatewayShardStarted gatewayShardStarted ) { - assertNull(nodeGatewayStartedShards.storeException()); - assertNotNull(nodeGatewayStartedShards.allocationId()); - assertTrue(nodeGatewayStartedShards.primary()); + assertNull(gatewayShardStarted.storeException()); + assertNotNull(gatewayShardStarted.allocationId()); + assertTrue(gatewayShardStarted.primary()); } private void prepareIndex(String indexName, int numberOfPrimaryShards) { diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java index 5046873830c01..f76c82478155a 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java @@ -50,6 +50,7 @@ import org.opensearch.cluster.routing.allocation.decider.Decision.Type; import org.opensearch.env.ShardLockObtainFailedException; import org.opensearch.gateway.AsyncShardFetch.FetchResult; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayShardStarted; import org.opensearch.gateway.TransportNodesListGatewayStartedShards.NodeGatewayStartedShards; import java.util.ArrayList; @@ -125,27 +126,33 @@ public AllocateUnassignedDecision makeAllocationDecision( return decision; } final FetchResult shardState = fetchData(unassignedShard, allocation); - List nodeShardStates = adaptToNodeStartedShardList(shardState); + List nodeShardStates = adaptToNodeStartedShardList(shardState); return getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger); } /** - * Transforms {@link FetchResult} of {@link NodeGatewayStartedShards} to {@link List} of {@link NodeGatewayStartedShards} + * Transforms {@link FetchResult} of {@link NodeGatewayStartedShards} to {@link List} of {@link NodeGatewayShardStarted} * Returns null if {@link FetchResult} does not have any data. */ - private static List adaptToNodeStartedShardList(FetchResult shardsState) { + private static List adaptToNodeStartedShardList(FetchResult shardsState) { if (!shardsState.hasData()) { return null; } - List nodeShardStates = new ArrayList<>(); - shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add(nodeGatewayStartedShard); }); + List nodeShardStates = new ArrayList<>(); + shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add(new NodeGatewayShardStarted( + nodeGatewayStartedShard.allocationId(), + nodeGatewayStartedShard.primary(), + nodeGatewayStartedShard.replicationCheckpoint(), + nodeGatewayStartedShard.storeException(), + node + )); }); return nodeShardStates; } protected AllocateUnassignedDecision getAllocationDecision( ShardRouting unassignedShard, RoutingAllocation allocation, - List shardState, + List shardState, Logger logger ) { final boolean explain = allocation.debugDecision(); @@ -236,7 +243,7 @@ protected AllocateUnassignedDecision getAllocationDecision( nodesToAllocate = buildNodesToAllocate(allocation, nodeShardsResult.orderedAllocationCandidates, unassignedShard, true); if (nodesToAllocate.yesNodeShards.isEmpty() == false) { final DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); - final NodeGatewayStartedShards nodeShardState = decidedNode.nodeShardState; + final NodeGatewayShardStarted nodeShardState = decidedNode.nodeShardState; logger.debug( "[{}][{}]: allocating [{}] to [{}] on forced primary allocation", unassignedShard.index(), @@ -296,11 +303,11 @@ protected AllocateUnassignedDecision getAllocationDecision( */ private static List buildNodeDecisions( NodesToAllocate nodesToAllocate, - List fetchedShardData, + List fetchedShardData, Set inSyncAllocationIds ) { List nodeResults = new ArrayList<>(); - Collection ineligibleShards = new ArrayList<>(); + Collection ineligibleShards = new ArrayList<>(); if (nodesToAllocate != null) { final Set discoNodes = new HashSet<>(); nodeResults.addAll( @@ -334,21 +341,21 @@ private static List buildNodeDecisions( return nodeResults; } - private static ShardStoreInfo shardStoreInfo(NodeGatewayStartedShards nodeShardState, Set inSyncAllocationIds) { + private static ShardStoreInfo shardStoreInfo(NodeGatewayShardStarted nodeShardState, Set inSyncAllocationIds) { final Exception storeErr = nodeShardState.storeException(); final boolean inSync = nodeShardState.allocationId() != null && inSyncAllocationIds.contains(nodeShardState.allocationId()); return new ShardStoreInfo(nodeShardState.allocationId(), inSync, storeErr); } - private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( - (NodeGatewayStartedShards state) -> state.storeException() == null + private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( + (NodeGatewayShardStarted state) -> state.storeException() == null ).reversed(); - private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayStartedShards::primary + private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayShardStarted::primary ).reversed(); - private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayStartedShards::replicationCheckpoint, + private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayShardStarted::replicationCheckpoint, Comparator.nullsLast(Comparator.naturalOrder()) ); @@ -362,12 +369,12 @@ protected static NodeShardsResult buildNodeShardsResult( boolean matchAnyShard, Set ignoreNodes, Set inSyncAllocationIds, - List shardState, + List shardState, Logger logger ) { - List nodeShardStates = new ArrayList<>(); + List nodeShardStates = new ArrayList<>(); int numberOfAllocationsFound = 0; - for (NodeGatewayStartedShards nodeShardState : shardState) { + for (NodeGatewayShardStarted nodeShardState : shardState) { DiscoveryNode node = nodeShardState.getNode(); String allocationId = nodeShardState.allocationId(); @@ -432,7 +439,7 @@ protected static NodeShardsResult buildNodeShardsResult( return new NodeShardsResult(nodeShardStates, numberOfAllocationsFound); } - private static Comparator createActiveShardComparator( + private static Comparator createActiveShardComparator( boolean matchAnyShard, Set inSyncAllocationIds ) { @@ -442,11 +449,11 @@ private static Comparator createActiveShardComparator( * 2. Prefer previous primary shard * 3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices. */ - final Comparator comparator; // allocation preference + final Comparator comparator; // allocation preference if (matchAnyShard) { // prefer shards with matching allocation ids - Comparator matchingAllocationsFirst = Comparator.comparing( - (NodeGatewayStartedShards state) -> inSyncAllocationIds.contains(state.allocationId()) + Comparator matchingAllocationsFirst = Comparator.comparing( + (NodeGatewayShardStarted state) -> inSyncAllocationIds.contains(state.allocationId()) ).reversed(); comparator = matchingAllocationsFirst.thenComparing(NO_STORE_EXCEPTION_FIRST_COMPARATOR) .thenComparing(PRIMARY_FIRST_COMPARATOR) @@ -464,14 +471,14 @@ private static Comparator createActiveShardComparator( */ private static NodesToAllocate buildNodesToAllocate( RoutingAllocation allocation, - List nodeShardStates, + List nodeShardStates, ShardRouting shardRouting, boolean forceAllocate ) { List yesNodeShards = new ArrayList<>(); List throttledNodeShards = new ArrayList<>(); List noNodeShards = new ArrayList<>(); - for (NodeGatewayStartedShards nodeShardState : nodeShardStates) { + for (NodeGatewayShardStarted nodeShardState : nodeShardStates) { RoutingNode node = allocation.routingNodes().node(nodeShardState.getNode().getId()); if (node == null) { continue; @@ -502,10 +509,10 @@ private static NodesToAllocate buildNodesToAllocate( * This class encapsulates the result of a call to {@link #buildNodeShardsResult} */ static class NodeShardsResult { - final List orderedAllocationCandidates; + final List orderedAllocationCandidates; final int allocationsFound; - NodeShardsResult(List orderedAllocationCandidates, int allocationsFound) { + NodeShardsResult(List orderedAllocationCandidates, int allocationsFound) { this.orderedAllocationCandidates = orderedAllocationCandidates; this.allocationsFound = allocationsFound; } @@ -531,10 +538,10 @@ protected static class NodesToAllocate { * by the allocator for allocating to the node that holds the shard copy. */ private static class DecidedNode { - final NodeGatewayStartedShards nodeShardState; + final NodeGatewayShardStarted nodeShardState; final Decision decision; - private DecidedNode(NodeGatewayStartedShards nodeShardState, Decision decision) { + private DecidedNode(NodeGatewayShardStarted nodeShardState, Decision decision) { this.nodeShardState = nodeShardState; this.decision = decision; } diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index d073482405ec2..3f04ac4d2f575 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -15,7 +15,7 @@ import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayShardStarted; import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; import java.util.ArrayList; @@ -54,7 +54,7 @@ protected FetchResult makeAllocationDecision( // process the received data for (ShardRouting unassignedShard : eligibleShards) { - List nodeShardStates = adaptToNodeShardStates( + List nodeShardStates = adaptToNodeShardStates( unassignedShard, shardsState ); @@ -123,27 +123,27 @@ public HashMap makeAllocationDecision( * @param shardsState fetch data result for the whole batch * @return shard state returned from each node */ - private static List adaptToNodeShardStates( + private static List adaptToNodeShardStates( ShardRouting unassignedShard, FetchResult shardsState ) { if (!shardsState.hasData()) { return null; } - List nodeShardStates = new ArrayList<>(); + List nodeShardStates = new ArrayList<>(); Map nodeResponses = shardsState.getData(); // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { - NodeGatewayStartedShard shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() + TransportNodesGatewayStartedShardHelper.GatewayShardStarted shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() .get(unassignedShard.shardId()); nodeShardStates.add( - new TransportNodesListGatewayStartedShards.NodeGatewayStartedShards( - node, + new NodeGatewayShardStarted( shardData.allocationId(), shardData.primary(), shardData.replicationCheckpoint(), - shardData.storeException() + shardData.storeException(), + node ) ); }); diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java index 403e3e96fa209..01b944d4390d1 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java @@ -12,8 +12,11 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; @@ -23,8 +26,10 @@ import org.opensearch.index.shard.ShardStateMetadata; import org.opensearch.index.store.Store; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import java.io.IOException; +import java.util.Objects; /** * This class has the common code used in {@link TransportNodesListGatewayStartedShards} and @@ -37,7 +42,7 @@ * @opensearch.internal */ public class TransportNodesGatewayStartedShardHelper { - public static TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard getShardInfoOnLocalNode( + public static GatewayShardStarted getShardInfoOnLocalNode( Logger logger, final ShardId shardId, NamedXContentRegistry namedXContentRegistry, @@ -90,7 +95,7 @@ public static TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShar exception ); String allocationId = shardStateMetadata.allocationId != null ? shardStateMetadata.allocationId.getId() : null; - return new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( + return new GatewayShardStarted( allocationId, shardStateMetadata.primary, null, @@ -102,13 +107,149 @@ public static TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShar logger.debug("{} shard state info found: [{}]", shardId, shardStateMetadata); String allocationId = shardStateMetadata.allocationId != null ? shardStateMetadata.allocationId.getId() : null; final IndexShard shard = indicesService.getShardOrNull(shardId); - return new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( + return new GatewayShardStarted( allocationId, shardStateMetadata.primary, shard != null ? shard.getLatestReplicationCheckpoint() : null ); } logger.trace("{} no local shard info found", shardId); - return new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard(null, false, null); + return new GatewayShardStarted(null, false, null); + } + + /** + * This class encapsulates the metadata about a started shard that needs to be persisted or sent between nodes. + * This is used in {@link TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch} to construct the response for each node, instead of + * {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} because we don't need to save an extra + * {@link DiscoveryNode} object like in {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} + * which reduces memory footprint of its objects. + * + * @opensearch.internal + */ + public static class GatewayShardStarted { + private final String allocationId; + private final boolean primary; + private final Exception storeException; + private final ReplicationCheckpoint replicationCheckpoint; + + public GatewayShardStarted(StreamInput in) throws IOException { + allocationId = in.readOptionalString(); + primary = in.readBoolean(); + if (in.readBoolean()) { + storeException = in.readException(); + } else { + storeException = null; + } + if (in.readBoolean()) { + replicationCheckpoint = new ReplicationCheckpoint(in); + } else { + replicationCheckpoint = null; + } + } + + public GatewayShardStarted(String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint) { + this(allocationId, primary, replicationCheckpoint, null); + } + + public GatewayShardStarted( + String allocationId, + boolean primary, + ReplicationCheckpoint replicationCheckpoint, + Exception storeException + ) { + this.allocationId = allocationId; + this.primary = primary; + this.replicationCheckpoint = replicationCheckpoint; + this.storeException = storeException; + } + + public String allocationId() { + return this.allocationId; + } + + public boolean primary() { + return this.primary; + } + + public ReplicationCheckpoint replicationCheckpoint() { + return this.replicationCheckpoint; + } + + public Exception storeException() { + return this.storeException; + } + + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalString(allocationId); + out.writeBoolean(primary); + if (storeException != null) { + out.writeBoolean(true); + out.writeException(storeException); + } else { + out.writeBoolean(false); + } + if (replicationCheckpoint != null) { + out.writeBoolean(true); + replicationCheckpoint.writeTo(out); + } else { + out.writeBoolean(false); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + GatewayShardStarted that = (GatewayShardStarted) o; + + return primary == that.primary + && Objects.equals(allocationId, that.allocationId) + && Objects.equals(storeException, that.storeException) + && Objects.equals(replicationCheckpoint, that.replicationCheckpoint); + } + + @Override + public int hashCode() { + int result = (allocationId != null ? allocationId.hashCode() : 0); + result = 31 * result + (primary ? 1 : 0); + result = 31 * result + (storeException != null ? storeException.hashCode() : 0); + result = 31 * result + (replicationCheckpoint != null ? replicationCheckpoint.hashCode() : 0); + return result; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("NodeGatewayStartedShards[").append("allocationId=").append(allocationId).append(",primary=").append(primary); + if (storeException != null) { + buf.append(",storeException=").append(storeException); + } + if (replicationCheckpoint != null) { + buf.append(",ReplicationCheckpoint=").append(replicationCheckpoint.toString()); + } + buf.append("]"); + return buf.toString(); + } + } + + public static class NodeGatewayShardStarted extends GatewayShardStarted { + + private final DiscoveryNode node; + + public NodeGatewayShardStarted(String allocationId, boolean primary, + ReplicationCheckpoint replicationCheckpoint, Exception storeException, + DiscoveryNode node) { + super(allocationId, primary, replicationCheckpoint, storeException); + this.node = node; + } + + public DiscoveryNode getNode() { + return node; + } } } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java index 0ba872aab9974..cb4470d0c04da 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java @@ -59,6 +59,7 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportService; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; import java.io.IOException; import java.util.List; @@ -154,7 +155,7 @@ protected NodesGatewayStartedShards newResponse( @Override protected NodeGatewayStartedShards nodeOperation(NodeRequest request) { try { - TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard shardInfo = getShardInfoOnLocalNode( + GatewayShardStarted shardInfo = getShardInfoOnLocalNode( logger, request.getShardId(), namedXContentRegistry, diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java index bc327c1b85748..c6aeca85f984b 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java @@ -28,7 +28,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; import org.opensearch.indices.IndicesService; -import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; import org.opensearch.indices.store.ShardAttributes; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; @@ -135,7 +135,7 @@ protected NodesGatewayStartedShardsBatch newResponse( */ @Override protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { - Map shardsOnNode = new HashMap<>(); + Map shardsOnNode = new HashMap<>(); for (ShardAttributes shardAttr : request.shardAttributes.values()) { final ShardId shardId = shardAttr.getShardId(); try { @@ -155,7 +155,7 @@ protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { } catch (Exception e) { shardsOnNode.put( shardId, - new NodeGatewayStartedShard(null, false, null, new OpenSearchException("failed to load started shards", e)) + new GatewayShardStarted(null, false, null, new OpenSearchException("failed to load started shards", e)) ); } } @@ -248,125 +248,7 @@ public void writeTo(StreamOutput out) throws IOException { } } - /** - * This class encapsulates the metadata about a started shard that needs to be persisted or sent between nodes. - * This is used in {@link NodeGatewayStartedShardsBatch} to construct the response for each node, instead of - * {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} because we don't need to save an extra - * {@link DiscoveryNode} object like in {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} - * which reduces memory footprint of its objects. - * - * @opensearch.internal - */ - public static class NodeGatewayStartedShard { - private final String allocationId; - private final boolean primary; - private final Exception storeException; - private final ReplicationCheckpoint replicationCheckpoint; - - public NodeGatewayStartedShard(StreamInput in) throws IOException { - allocationId = in.readOptionalString(); - primary = in.readBoolean(); - if (in.readBoolean()) { - storeException = in.readException(); - } else { - storeException = null; - } - if (in.readBoolean()) { - replicationCheckpoint = new ReplicationCheckpoint(in); - } else { - replicationCheckpoint = null; - } - } - - public NodeGatewayStartedShard(String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint) { - this(allocationId, primary, replicationCheckpoint, null); - } - - public NodeGatewayStartedShard( - String allocationId, - boolean primary, - ReplicationCheckpoint replicationCheckpoint, - Exception storeException - ) { - this.allocationId = allocationId; - this.primary = primary; - this.replicationCheckpoint = replicationCheckpoint; - this.storeException = storeException; - } - - public String allocationId() { - return this.allocationId; - } - - public boolean primary() { - return this.primary; - } - - public ReplicationCheckpoint replicationCheckpoint() { - return this.replicationCheckpoint; - } - - public Exception storeException() { - return this.storeException; - } - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalString(allocationId); - out.writeBoolean(primary); - if (storeException != null) { - out.writeBoolean(true); - out.writeException(storeException); - } else { - out.writeBoolean(false); - } - if (replicationCheckpoint != null) { - out.writeBoolean(true); - replicationCheckpoint.writeTo(out); - } else { - out.writeBoolean(false); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - NodeGatewayStartedShard that = (NodeGatewayStartedShard) o; - - return primary == that.primary - && Objects.equals(allocationId, that.allocationId) - && Objects.equals(storeException, that.storeException) - && Objects.equals(replicationCheckpoint, that.replicationCheckpoint); - } - - @Override - public int hashCode() { - int result = (allocationId != null ? allocationId.hashCode() : 0); - result = 31 * result + (primary ? 1 : 0); - result = 31 * result + (storeException != null ? storeException.hashCode() : 0); - result = 31 * result + (replicationCheckpoint != null ? replicationCheckpoint.hashCode() : 0); - return result; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - buf.append("NodeGatewayStartedShards[").append("allocationId=").append(allocationId).append(",primary=").append(primary); - if (storeException != null) { - buf.append(",storeException=").append(storeException); - } - if (replicationCheckpoint != null) { - buf.append(",ReplicationCheckpoint=").append(replicationCheckpoint.toString()); - } - buf.append("]"); - return buf.toString(); - } - } /** * This is the response from a single node, this is used in {@link NodesGatewayStartedShardsBatch} for creating @@ -376,15 +258,15 @@ public String toString() { * @opensearch.internal */ public static class NodeGatewayStartedShardsBatch extends BaseNodeResponse { - private final Map nodeGatewayStartedShardsBatch; + private final Map nodeGatewayStartedShardsBatch; - public Map getNodeGatewayStartedShardsBatch() { + public Map getNodeGatewayStartedShardsBatch() { return nodeGatewayStartedShardsBatch; } public NodeGatewayStartedShardsBatch(StreamInput in) throws IOException { super(in); - this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, NodeGatewayStartedShard::new); + this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, GatewayShardStarted::new); } @Override @@ -393,7 +275,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeMap(nodeGatewayStartedShardsBatch, (o, k) -> k.writeTo(o), (o, v) -> v.writeTo(o)); } - public NodeGatewayStartedShardsBatch(DiscoveryNode node, Map nodeGatewayStartedShardsBatch) { + public NodeGatewayStartedShardsBatch(DiscoveryNode node, Map nodeGatewayStartedShardsBatch) { super(node); this.nodeGatewayStartedShardsBatch = nodeGatewayStartedShardsBatch; } diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java index a95328b9ddc39..0175961f6a274 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java @@ -289,9 +289,9 @@ public TestBatchAllocator addData( if (data == null) { data = new HashMap<>(); } - Map shardData = Map.of( + Map shardData = Map.of( shardId, - new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( + new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( allocationId, primary, replicationCheckpoint, @@ -313,10 +313,10 @@ public TestBatchAllocator addShardData( if (data == null) { data = new HashMap<>(); } - Map shardData = new HashMap<>(); + Map shardData = new HashMap<>(); shardData.put( shardId, - new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShard( + new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( allocationId, primary, replicationCheckpoint, From e0cff1a2e52991392ee3d0c32e309a1bc39c07d2 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Fri, 15 Mar 2024 16:37:47 +0530 Subject: [PATCH 07/10] spotless apply Signed-off-by: Aman Khare --- .../gateway/RecoveryFromGatewayIT.java | 9 ++------ .../gateway/PrimaryShardAllocator.java | 23 ++++++++++--------- .../gateway/PrimaryShardBatchAllocator.java | 8 +++---- ...ansportNodesGatewayStartedShardHelper.java | 17 +++++++------- ...ransportNodesListGatewayStartedShards.java | 2 +- ...ortNodesListGatewayStartedShardsBatch.java | 4 +--- 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 3128b55c17029..adc233ab6d047 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -793,10 +793,7 @@ public void testShardFetchMultiNodeMultiIndexesUsingBatchAction() { ShardId shardId = clusterSearchShardsGroup.getShardId(); assertEquals(1, clusterSearchShardsGroup.getShards().length); String nodeId = clusterSearchShardsGroup.getShards()[0].currentNodeId(); - GatewayShardStarted gatewayShardStarted = response.getNodesMap() - .get(nodeId) - .getNodeGatewayStartedShardsBatch() - .get(shardId); + GatewayShardStarted gatewayShardStarted = response.getNodesMap().get(nodeId).getNodeGatewayStartedShardsBatch().get(shardId); assertNodeGatewayStartedShardsHappyCase(gatewayShardStarted); } } @@ -951,9 +948,7 @@ private void assertNodeStoreFilesMetadataSuccessCase( assertNotNull(storeFileMetadata.peerRecoveryRetentionLeases()); } - private void assertNodeGatewayStartedShardsHappyCase( - GatewayShardStarted gatewayShardStarted - ) { + private void assertNodeGatewayStartedShardsHappyCase(GatewayShardStarted gatewayShardStarted) { assertNull(gatewayShardStarted.storeException()); assertNotNull(gatewayShardStarted.allocationId()); assertTrue(gatewayShardStarted.primary()); diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java index f76c82478155a..e16e84c95b4b2 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java @@ -139,13 +139,17 @@ private static List adaptToNodeStartedShardList(FetchRe return null; } List nodeShardStates = new ArrayList<>(); - shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add(new NodeGatewayShardStarted( - nodeGatewayStartedShard.allocationId(), - nodeGatewayStartedShard.primary(), - nodeGatewayStartedShard.replicationCheckpoint(), - nodeGatewayStartedShard.storeException(), - node - )); }); + shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { + nodeShardStates.add( + new NodeGatewayShardStarted( + nodeGatewayStartedShard.allocationId(), + nodeGatewayStartedShard.primary(), + nodeGatewayStartedShard.replicationCheckpoint(), + nodeGatewayStartedShard.storeException(), + node + ) + ); + }); return nodeShardStates; } @@ -439,10 +443,7 @@ protected static NodeShardsResult buildNodeShardsResult( return new NodeShardsResult(nodeShardStates, numberOfAllocationsFound); } - private static Comparator createActiveShardComparator( - boolean matchAnyShard, - Set inSyncAllocationIds - ) { + private static Comparator createActiveShardComparator(boolean matchAnyShard, Set inSyncAllocationIds) { /** * Orders the active shards copies based on below comparators * 1. No store exception i.e. shard copy is readable diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 3f04ac4d2f575..08ce4ab9b3f39 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -99,10 +99,7 @@ public HashMap makeAllocationDecision( // process the received data for (ShardRouting unassignedShard : eligibleShards) { - List nodeShardStates = adaptToNodeShardStates( - unassignedShard, - shardsState - ); + List nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); // get allocation decision for this shard shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); } @@ -135,7 +132,8 @@ private static List adaptToNodeShardStates( // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { - TransportNodesGatewayStartedShardHelper.GatewayShardStarted shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch() + TransportNodesGatewayStartedShardHelper.GatewayShardStarted shardData = nodeGatewayStartedShardsBatch + .getNodeGatewayStartedShardsBatch() .get(unassignedShard.shardId()); nodeShardStates.add( new NodeGatewayShardStarted( diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java index 01b944d4390d1..5168cac6f920b 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java @@ -95,12 +95,7 @@ public static GatewayShardStarted getShardInfoOnLocalNode( exception ); String allocationId = shardStateMetadata.allocationId != null ? shardStateMetadata.allocationId.getId() : null; - return new GatewayShardStarted( - allocationId, - shardStateMetadata.primary, - null, - exception - ); + return new GatewayShardStarted(allocationId, shardStateMetadata.primary, null, exception); } } @@ -241,9 +236,13 @@ public static class NodeGatewayShardStarted extends GatewayShardStarted { private final DiscoveryNode node; - public NodeGatewayShardStarted(String allocationId, boolean primary, - ReplicationCheckpoint replicationCheckpoint, Exception storeException, - DiscoveryNode node) { + public NodeGatewayShardStarted( + String allocationId, + boolean primary, + ReplicationCheckpoint replicationCheckpoint, + Exception storeException, + DiscoveryNode node + ) { super(allocationId, primary, replicationCheckpoint, storeException); this.node = node; } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java index cb4470d0c04da..f81e6bb46bb64 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java @@ -53,13 +53,13 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; import org.opensearch.indices.IndicesService; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.store.ShardAttributes; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportService; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; import java.io.IOException; import java.util.List; diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java index c6aeca85f984b..d8e83b955d46d 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java @@ -27,8 +27,8 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; -import org.opensearch.indices.IndicesService; import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; +import org.opensearch.indices.IndicesService; import org.opensearch.indices.store.ShardAttributes; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; @@ -248,8 +248,6 @@ public void writeTo(StreamOutput out) throws IOException { } } - - /** * This is the response from a single node, this is used in {@link NodesGatewayStartedShardsBatch} for creating * node to its response mapping for this transport request. From 6112f4b14c2e3175fdcdd15afa2d86be8ad64be6 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Fri, 15 Mar 2024 16:49:15 +0530 Subject: [PATCH 08/10] Add java doc on new class Signed-off-by: Aman Khare --- .../gateway/TransportNodesGatewayStartedShardHelper.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java index 5168cac6f920b..d08291de2eac5 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java @@ -232,6 +232,14 @@ public String toString() { } } + /** + * This class extends the {@link GatewayShardStarted} which contains all necessary shard metadata like + * allocationId and replication checkpoint. It also has DiscoveryNode which is needed by + * {@link PrimaryShardAllocator} and {@link PrimaryShardBatchAllocator} to make allocation decision. + * This class removes the dependency of + * {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} to make allocation decisions by + * {@link PrimaryShardAllocator} or {@link PrimaryShardBatchAllocator}. + */ public static class NodeGatewayShardStarted extends GatewayShardStarted { private final DiscoveryNode node; From e798d7aaff720136b3228d47b01590502d1b6b2a Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Fri, 15 Mar 2024 22:57:31 +0530 Subject: [PATCH 09/10] Remove code duplication Signed-off-by: Aman Khare --- .../gateway/RecoveryFromGatewayIT.java | 6 +- .../TransportIndicesShardStoresAction.java | 7 +- .../gateway/PrimaryShardAllocator.java | 8 +- ...ransportNodesListGatewayStartedShards.java | 94 +++++-------------- .../gateway/PrimaryShardAllocatorTests.java | 10 +- .../test/gateway/TestGatewayAllocator.java | 10 +- 6 files changed, 49 insertions(+), 86 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index adc233ab6d047..8404d1f60399e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -721,11 +721,11 @@ public Settings onNodeStopped(String nodeName) throws Exception { ); assertThat(response.getNodes(), hasSize(1)); - assertThat(response.getNodes().get(0).allocationId(), notNullValue()); + assertThat(response.getNodes().get(0).getGatewayShardStarted().allocationId(), notNullValue()); if (corrupt) { - assertThat(response.getNodes().get(0).storeException(), notNullValue()); + assertThat(response.getNodes().get(0).getGatewayShardStarted().storeException(), notNullValue()); } else { - assertThat(response.getNodes().get(0).storeException(), nullValue()); + assertThat(response.getNodes().get(0).getGatewayShardStarted().storeException(), nullValue()); } // start another node so cluster consistency checks won't time out due to the lack of state diff --git a/server/src/main/java/org/opensearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java b/server/src/main/java/org/opensearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java index 04166c88a00ad..3fbf9ac1bb570 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java @@ -258,9 +258,9 @@ void finish() { storeStatuses.add( new IndicesShardStoresResponse.StoreStatus( response.getNode(), - response.allocationId(), + response.getGatewayShardStarted().allocationId(), allocationStatus, - response.storeException() + response.getGatewayShardStarted().storeException() ) ); } @@ -308,7 +308,8 @@ private IndicesShardStoresResponse.StoreStatus.AllocationStatus getAllocationSta * A shard exists/existed in a node only if shard state file exists in the node */ private boolean shardExistsInNode(final NodeGatewayStartedShards response) { - return response.storeException() != null || response.allocationId() != null; + return response.getGatewayShardStarted().storeException() != null + || response.getGatewayShardStarted().allocationId() != null; } @Override diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java index e16e84c95b4b2..aebbd6525e017 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java @@ -142,10 +142,10 @@ private static List adaptToNodeStartedShardList(FetchRe shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add( new NodeGatewayShardStarted( - nodeGatewayStartedShard.allocationId(), - nodeGatewayStartedShard.primary(), - nodeGatewayStartedShard.replicationCheckpoint(), - nodeGatewayStartedShard.storeException(), + nodeGatewayStartedShard.getGatewayShardStarted().allocationId(), + nodeGatewayStartedShard.getGatewayShardStarted().primary(), + nodeGatewayStartedShard.getGatewayShardStarted().replicationCheckpoint(), + nodeGatewayStartedShard.getGatewayShardStarted().storeException(), node ) ); diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java index f81e6bb46bb64..f18ae26c0c8c2 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java @@ -167,10 +167,12 @@ protected NodeGatewayStartedShards nodeOperation(NodeRequest request) { ); return new NodeGatewayStartedShards( clusterService.localNode(), - shardInfo.allocationId(), - shardInfo.primary(), - shardInfo.replicationCheckpoint(), - shardInfo.storeException() + new GatewayShardStarted( + shardInfo.allocationId(), + shardInfo.primary(), + shardInfo.replicationCheckpoint(), + shardInfo.storeException() + ) ); } catch (Exception e) { throw new OpenSearchException("failed to load started shards", e); @@ -303,81 +305,51 @@ public String getCustomDataPath() { * @opensearch.internal */ public static class NodeGatewayStartedShards extends BaseNodeResponse { - private final String allocationId; - private final boolean primary; - private final Exception storeException; - private final ReplicationCheckpoint replicationCheckpoint; + private final GatewayShardStarted gatewayShardStarted; public NodeGatewayStartedShards(StreamInput in) throws IOException { super(in); - allocationId = in.readOptionalString(); - primary = in.readBoolean(); + String allocationId = in.readOptionalString(); + boolean primary = in.readBoolean(); + Exception storeException; if (in.readBoolean()) { storeException = in.readException(); } else { storeException = null; } + ReplicationCheckpoint replicationCheckpoint; if (in.getVersion().onOrAfter(Version.V_2_3_0) && in.readBoolean()) { replicationCheckpoint = new ReplicationCheckpoint(in); } else { replicationCheckpoint = null; } + this.gatewayShardStarted = new GatewayShardStarted(allocationId, primary, replicationCheckpoint, storeException); } - public NodeGatewayStartedShards( - DiscoveryNode node, - String allocationId, - boolean primary, - ReplicationCheckpoint replicationCheckpoint - ) { - this(node, allocationId, primary, replicationCheckpoint, null); + public GatewayShardStarted getGatewayShardStarted() { + return gatewayShardStarted; } - public NodeGatewayStartedShards( - DiscoveryNode node, - String allocationId, - boolean primary, - ReplicationCheckpoint replicationCheckpoint, - Exception storeException - ) { + public NodeGatewayStartedShards(DiscoveryNode node, GatewayShardStarted gatewayShardStarted) { super(node); - this.allocationId = allocationId; - this.primary = primary; - this.replicationCheckpoint = replicationCheckpoint; - this.storeException = storeException; - } - - public String allocationId() { - return this.allocationId; - } - - public boolean primary() { - return this.primary; - } - - public ReplicationCheckpoint replicationCheckpoint() { - return this.replicationCheckpoint; - } - - public Exception storeException() { - return this.storeException; + this.gatewayShardStarted = gatewayShardStarted; } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeOptionalString(allocationId); - out.writeBoolean(primary); - if (storeException != null) { + out.writeOptionalString(gatewayShardStarted.allocationId()); + out.writeBoolean(gatewayShardStarted.primary()); + if (gatewayShardStarted.storeException() != null) { out.writeBoolean(true); - out.writeException(storeException); + out.writeException(gatewayShardStarted.storeException()); } else { out.writeBoolean(false); } if (out.getVersion().onOrAfter(Version.V_2_3_0)) { - if (replicationCheckpoint != null) { + if (gatewayShardStarted.replicationCheckpoint() != null) { out.writeBoolean(true); - replicationCheckpoint.writeTo(out); + gatewayShardStarted.replicationCheckpoint().writeTo(out); } else { out.writeBoolean(false); } @@ -395,33 +367,17 @@ public boolean equals(Object o) { NodeGatewayStartedShards that = (NodeGatewayStartedShards) o; - return primary == that.primary - && Objects.equals(allocationId, that.allocationId) - && Objects.equals(storeException, that.storeException) - && Objects.equals(replicationCheckpoint, that.replicationCheckpoint); + return gatewayShardStarted.equals(that.gatewayShardStarted); } @Override public int hashCode() { - int result = (allocationId != null ? allocationId.hashCode() : 0); - result = 31 * result + (primary ? 1 : 0); - result = 31 * result + (storeException != null ? storeException.hashCode() : 0); - result = 31 * result + (replicationCheckpoint != null ? replicationCheckpoint.hashCode() : 0); - return result; + return gatewayShardStarted.hashCode(); } @Override public String toString() { - StringBuilder buf = new StringBuilder(); - buf.append("NodeGatewayStartedShards[").append("allocationId=").append(allocationId).append(",primary=").append(primary); - if (storeException != null) { - buf.append(",storeException=").append(storeException); - } - if (replicationCheckpoint != null) { - buf.append(",ReplicationCheckpoint=").append(replicationCheckpoint.toString()); - } - buf.append("]"); - return buf.toString(); + return gatewayShardStarted.toString(); } } } diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java index dceda6433575c..cf3eed82fc940 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java @@ -843,10 +843,12 @@ public TestAllocator addData( node, new TransportNodesListGatewayStartedShards.NodeGatewayStartedShards( node, - allocationId, - primary, - replicationCheckpoint, - storeException + new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + allocationId, + primary, + replicationCheckpoint, + storeException + ) ) ); return this; diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java index f123b926f5bad..c3897e66479be 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java @@ -42,6 +42,7 @@ import org.opensearch.gateway.GatewayAllocator; import org.opensearch.gateway.PrimaryShardAllocator; import org.opensearch.gateway.ReplicaShardAllocator; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper; import org.opensearch.gateway.TransportNodesListGatewayStartedShards.NodeGatewayStartedShards; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata; @@ -91,9 +92,12 @@ protected AsyncShardFetch.FetchResult fetchData(ShardR routing -> currentNodes.get(routing.currentNodeId()), routing -> new NodeGatewayStartedShards( currentNodes.get(routing.currentNodeId()), - routing.allocationId().getId(), - routing.primary(), - getReplicationCheckpoint(shardId, routing.currentNodeId()) + new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + routing.allocationId().getId(), + routing.primary(), + getReplicationCheckpoint(shardId, routing.currentNodeId()), + null + ) ) ) ); From d372c54e0f648e3e07c2f9b2ae42c926a5a3e358 Mon Sep 17 00:00:00 2001 From: Aman Khare Date: Mon, 18 Mar 2024 18:24:14 +0530 Subject: [PATCH 10/10] rename class to older style Signed-off-by: Aman Khare --- .../gateway/RecoveryFromGatewayIT.java | 26 ++++---- .../gateway/PrimaryShardAllocator.java | 60 +++++++++---------- .../gateway/PrimaryShardBatchAllocator.java | 12 ++-- ...ansportNodesGatewayStartedShardHelper.java | 24 ++++---- ...ransportNodesListGatewayStartedShards.java | 36 +++++------ ...ortNodesListGatewayStartedShardsBatch.java | 14 ++--- .../gateway/PrimaryShardAllocatorTests.java | 2 +- .../PrimaryShardBatchAllocatorTests.java | 8 +-- .../test/gateway/TestGatewayAllocator.java | 2 +- 9 files changed, 92 insertions(+), 92 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 8404d1f60399e..ba03532a9aa2f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -56,7 +56,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.env.NodeEnvironment; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.MergePolicyProvider; @@ -765,11 +765,11 @@ public void testSingleShardFetchUsingBatchAction() { ); final Index index = resolveIndex(indexName); final ShardId shardId = new ShardId(index, 0); - GatewayShardStarted gatewayShardStarted = response.getNodesMap() + GatewayStartedShard gatewayStartedShard = response.getNodesMap() .get(searchShardsResponse.getNodes()[0].getId()) .getNodeGatewayStartedShardsBatch() .get(shardId); - assertNodeGatewayStartedShardsHappyCase(gatewayShardStarted); + assertNodeGatewayStartedShardsHappyCase(gatewayStartedShard); } public void testShardFetchMultiNodeMultiIndexesUsingBatchAction() { @@ -793,8 +793,8 @@ public void testShardFetchMultiNodeMultiIndexesUsingBatchAction() { ShardId shardId = clusterSearchShardsGroup.getShardId(); assertEquals(1, clusterSearchShardsGroup.getShards().length); String nodeId = clusterSearchShardsGroup.getShards()[0].currentNodeId(); - GatewayShardStarted gatewayShardStarted = response.getNodesMap().get(nodeId).getNodeGatewayStartedShardsBatch().get(shardId); - assertNodeGatewayStartedShardsHappyCase(gatewayShardStarted); + GatewayStartedShard gatewayStartedShard = response.getNodesMap().get(nodeId).getNodeGatewayStartedShardsBatch().get(shardId); + assertNodeGatewayStartedShardsHappyCase(gatewayStartedShard); } } @@ -814,13 +814,13 @@ public void testShardFetchCorruptedShardsUsingBatchAction() throws Exception { new TransportNodesListGatewayStartedShardsBatch.Request(getDiscoveryNodes(), shardIdShardAttributesMap) ); DiscoveryNode[] discoveryNodes = getDiscoveryNodes(); - GatewayShardStarted gatewayShardStarted = response.getNodesMap() + GatewayStartedShard gatewayStartedShard = response.getNodesMap() .get(discoveryNodes[0].getId()) .getNodeGatewayStartedShardsBatch() .get(shardId); - assertNotNull(gatewayShardStarted.storeException()); - assertNotNull(gatewayShardStarted.allocationId()); - assertTrue(gatewayShardStarted.primary()); + assertNotNull(gatewayStartedShard.storeException()); + assertNotNull(gatewayStartedShard.allocationId()); + assertTrue(gatewayStartedShard.primary()); } public void testSingleShardStoreFetchUsingBatchAction() throws ExecutionException, InterruptedException { @@ -948,10 +948,10 @@ private void assertNodeStoreFilesMetadataSuccessCase( assertNotNull(storeFileMetadata.peerRecoveryRetentionLeases()); } - private void assertNodeGatewayStartedShardsHappyCase(GatewayShardStarted gatewayShardStarted) { - assertNull(gatewayShardStarted.storeException()); - assertNotNull(gatewayShardStarted.allocationId()); - assertTrue(gatewayShardStarted.primary()); + private void assertNodeGatewayStartedShardsHappyCase(GatewayStartedShard gatewayStartedShard) { + assertNull(gatewayStartedShard.storeException()); + assertNotNull(gatewayStartedShard.allocationId()); + assertTrue(gatewayStartedShard.primary()); } private void prepareIndex(String indexName, int numberOfPrimaryShards) { diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java index aebbd6525e017..f41545cbdf9bf 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardAllocator.java @@ -50,7 +50,7 @@ import org.opensearch.cluster.routing.allocation.decider.Decision.Type; import org.opensearch.env.ShardLockObtainFailedException; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayShardStarted; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayStartedShard; import org.opensearch.gateway.TransportNodesListGatewayStartedShards.NodeGatewayStartedShards; import java.util.ArrayList; @@ -126,22 +126,22 @@ public AllocateUnassignedDecision makeAllocationDecision( return decision; } final FetchResult shardState = fetchData(unassignedShard, allocation); - List nodeShardStates = adaptToNodeStartedShardList(shardState); + List nodeShardStates = adaptToNodeStartedShardList(shardState); return getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger); } /** - * Transforms {@link FetchResult} of {@link NodeGatewayStartedShards} to {@link List} of {@link NodeGatewayShardStarted} + * Transforms {@link FetchResult} of {@link NodeGatewayStartedShards} to {@link List} of {@link NodeGatewayStartedShard} * Returns null if {@link FetchResult} does not have any data. */ - private static List adaptToNodeStartedShardList(FetchResult shardsState) { + private static List adaptToNodeStartedShardList(FetchResult shardsState) { if (!shardsState.hasData()) { return null; } - List nodeShardStates = new ArrayList<>(); + List nodeShardStates = new ArrayList<>(); shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add( - new NodeGatewayShardStarted( + new NodeGatewayStartedShard( nodeGatewayStartedShard.getGatewayShardStarted().allocationId(), nodeGatewayStartedShard.getGatewayShardStarted().primary(), nodeGatewayStartedShard.getGatewayShardStarted().replicationCheckpoint(), @@ -156,7 +156,7 @@ private static List adaptToNodeStartedShardList(FetchRe protected AllocateUnassignedDecision getAllocationDecision( ShardRouting unassignedShard, RoutingAllocation allocation, - List shardState, + List shardState, Logger logger ) { final boolean explain = allocation.debugDecision(); @@ -247,7 +247,7 @@ protected AllocateUnassignedDecision getAllocationDecision( nodesToAllocate = buildNodesToAllocate(allocation, nodeShardsResult.orderedAllocationCandidates, unassignedShard, true); if (nodesToAllocate.yesNodeShards.isEmpty() == false) { final DecidedNode decidedNode = nodesToAllocate.yesNodeShards.get(0); - final NodeGatewayShardStarted nodeShardState = decidedNode.nodeShardState; + final NodeGatewayStartedShard nodeShardState = decidedNode.nodeShardState; logger.debug( "[{}][{}]: allocating [{}] to [{}] on forced primary allocation", unassignedShard.index(), @@ -307,11 +307,11 @@ protected AllocateUnassignedDecision getAllocationDecision( */ private static List buildNodeDecisions( NodesToAllocate nodesToAllocate, - List fetchedShardData, + List fetchedShardData, Set inSyncAllocationIds ) { List nodeResults = new ArrayList<>(); - Collection ineligibleShards = new ArrayList<>(); + Collection ineligibleShards = new ArrayList<>(); if (nodesToAllocate != null) { final Set discoNodes = new HashSet<>(); nodeResults.addAll( @@ -345,21 +345,21 @@ private static List buildNodeDecisions( return nodeResults; } - private static ShardStoreInfo shardStoreInfo(NodeGatewayShardStarted nodeShardState, Set inSyncAllocationIds) { + private static ShardStoreInfo shardStoreInfo(NodeGatewayStartedShard nodeShardState, Set inSyncAllocationIds) { final Exception storeErr = nodeShardState.storeException(); final boolean inSync = nodeShardState.allocationId() != null && inSyncAllocationIds.contains(nodeShardState.allocationId()); return new ShardStoreInfo(nodeShardState.allocationId(), inSync, storeErr); } - private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( - (NodeGatewayShardStarted state) -> state.storeException() == null + private static final Comparator NO_STORE_EXCEPTION_FIRST_COMPARATOR = Comparator.comparing( + (NodeGatewayStartedShard state) -> state.storeException() == null ).reversed(); - private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayShardStarted::primary + private static final Comparator PRIMARY_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayStartedShard::primary ).reversed(); - private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( - NodeGatewayShardStarted::replicationCheckpoint, + private static final Comparator HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR = Comparator.comparing( + NodeGatewayStartedShard::replicationCheckpoint, Comparator.nullsLast(Comparator.naturalOrder()) ); @@ -373,12 +373,12 @@ protected static NodeShardsResult buildNodeShardsResult( boolean matchAnyShard, Set ignoreNodes, Set inSyncAllocationIds, - List shardState, + List shardState, Logger logger ) { - List nodeShardStates = new ArrayList<>(); + List nodeShardStates = new ArrayList<>(); int numberOfAllocationsFound = 0; - for (NodeGatewayShardStarted nodeShardState : shardState) { + for (NodeGatewayStartedShard nodeShardState : shardState) { DiscoveryNode node = nodeShardState.getNode(); String allocationId = nodeShardState.allocationId(); @@ -443,18 +443,18 @@ protected static NodeShardsResult buildNodeShardsResult( return new NodeShardsResult(nodeShardStates, numberOfAllocationsFound); } - private static Comparator createActiveShardComparator(boolean matchAnyShard, Set inSyncAllocationIds) { + private static Comparator createActiveShardComparator(boolean matchAnyShard, Set inSyncAllocationIds) { /** * Orders the active shards copies based on below comparators * 1. No store exception i.e. shard copy is readable * 2. Prefer previous primary shard * 3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices. */ - final Comparator comparator; // allocation preference + final Comparator comparator; // allocation preference if (matchAnyShard) { // prefer shards with matching allocation ids - Comparator matchingAllocationsFirst = Comparator.comparing( - (NodeGatewayShardStarted state) -> inSyncAllocationIds.contains(state.allocationId()) + Comparator matchingAllocationsFirst = Comparator.comparing( + (NodeGatewayStartedShard state) -> inSyncAllocationIds.contains(state.allocationId()) ).reversed(); comparator = matchingAllocationsFirst.thenComparing(NO_STORE_EXCEPTION_FIRST_COMPARATOR) .thenComparing(PRIMARY_FIRST_COMPARATOR) @@ -472,14 +472,14 @@ private static Comparator createActiveShardComparator(b */ private static NodesToAllocate buildNodesToAllocate( RoutingAllocation allocation, - List nodeShardStates, + List nodeShardStates, ShardRouting shardRouting, boolean forceAllocate ) { List yesNodeShards = new ArrayList<>(); List throttledNodeShards = new ArrayList<>(); List noNodeShards = new ArrayList<>(); - for (NodeGatewayShardStarted nodeShardState : nodeShardStates) { + for (NodeGatewayStartedShard nodeShardState : nodeShardStates) { RoutingNode node = allocation.routingNodes().node(nodeShardState.getNode().getId()); if (node == null) { continue; @@ -510,10 +510,10 @@ private static NodesToAllocate buildNodesToAllocate( * This class encapsulates the result of a call to {@link #buildNodeShardsResult} */ static class NodeShardsResult { - final List orderedAllocationCandidates; + final List orderedAllocationCandidates; final int allocationsFound; - NodeShardsResult(List orderedAllocationCandidates, int allocationsFound) { + NodeShardsResult(List orderedAllocationCandidates, int allocationsFound) { this.orderedAllocationCandidates = orderedAllocationCandidates; this.allocationsFound = allocationsFound; } @@ -539,10 +539,10 @@ protected static class NodesToAllocate { * by the allocator for allocating to the node that holds the shard copy. */ private static class DecidedNode { - final NodeGatewayShardStarted nodeShardState; + final NodeGatewayStartedShard nodeShardState; final Decision decision; - private DecidedNode(NodeGatewayShardStarted nodeShardState, Decision decision) { + private DecidedNode(NodeGatewayStartedShard nodeShardState, Decision decision) { this.nodeShardState = nodeShardState; this.decision = decision; } diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 08ce4ab9b3f39..8d222903b6f29 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -15,7 +15,7 @@ import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayShardStarted; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayStartedShard; import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; import java.util.ArrayList; @@ -99,7 +99,7 @@ public HashMap makeAllocationDecision( // process the received data for (ShardRouting unassignedShard : eligibleShards) { - List nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); + List nodeShardStates = adaptToNodeShardStates(unassignedShard, shardsState); // get allocation decision for this shard shardAllocationDecisions.put(unassignedShard, getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger)); } @@ -120,23 +120,23 @@ public HashMap makeAllocationDecision( * @param shardsState fetch data result for the whole batch * @return shard state returned from each node */ - private static List adaptToNodeShardStates( + private static List adaptToNodeShardStates( ShardRouting unassignedShard, FetchResult shardsState ) { if (!shardsState.hasData()) { return null; } - List nodeShardStates = new ArrayList<>(); + List nodeShardStates = new ArrayList<>(); Map nodeResponses = shardsState.getData(); // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { - TransportNodesGatewayStartedShardHelper.GatewayShardStarted shardData = nodeGatewayStartedShardsBatch + TransportNodesGatewayStartedShardHelper.GatewayStartedShard shardData = nodeGatewayStartedShardsBatch .getNodeGatewayStartedShardsBatch() .get(unassignedShard.shardId()); nodeShardStates.add( - new NodeGatewayShardStarted( + new NodeGatewayStartedShard( shardData.allocationId(), shardData.primary(), shardData.replicationCheckpoint(), diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java index d08291de2eac5..27cce76b1b694 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java @@ -42,7 +42,7 @@ * @opensearch.internal */ public class TransportNodesGatewayStartedShardHelper { - public static GatewayShardStarted getShardInfoOnLocalNode( + public static GatewayStartedShard getShardInfoOnLocalNode( Logger logger, final ShardId shardId, NamedXContentRegistry namedXContentRegistry, @@ -95,21 +95,21 @@ public static GatewayShardStarted getShardInfoOnLocalNode( exception ); String allocationId = shardStateMetadata.allocationId != null ? shardStateMetadata.allocationId.getId() : null; - return new GatewayShardStarted(allocationId, shardStateMetadata.primary, null, exception); + return new GatewayStartedShard(allocationId, shardStateMetadata.primary, null, exception); } } logger.debug("{} shard state info found: [{}]", shardId, shardStateMetadata); String allocationId = shardStateMetadata.allocationId != null ? shardStateMetadata.allocationId.getId() : null; final IndexShard shard = indicesService.getShardOrNull(shardId); - return new GatewayShardStarted( + return new GatewayStartedShard( allocationId, shardStateMetadata.primary, shard != null ? shard.getLatestReplicationCheckpoint() : null ); } logger.trace("{} no local shard info found", shardId); - return new GatewayShardStarted(null, false, null); + return new GatewayStartedShard(null, false, null); } /** @@ -121,13 +121,13 @@ public static GatewayShardStarted getShardInfoOnLocalNode( * * @opensearch.internal */ - public static class GatewayShardStarted { + public static class GatewayStartedShard { private final String allocationId; private final boolean primary; private final Exception storeException; private final ReplicationCheckpoint replicationCheckpoint; - public GatewayShardStarted(StreamInput in) throws IOException { + public GatewayStartedShard(StreamInput in) throws IOException { allocationId = in.readOptionalString(); primary = in.readBoolean(); if (in.readBoolean()) { @@ -142,11 +142,11 @@ public GatewayShardStarted(StreamInput in) throws IOException { } } - public GatewayShardStarted(String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint) { + public GatewayStartedShard(String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint) { this(allocationId, primary, replicationCheckpoint, null); } - public GatewayShardStarted( + public GatewayStartedShard( String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint, @@ -200,7 +200,7 @@ public boolean equals(Object o) { return false; } - GatewayShardStarted that = (GatewayShardStarted) o; + GatewayStartedShard that = (GatewayStartedShard) o; return primary == that.primary && Objects.equals(allocationId, that.allocationId) @@ -233,18 +233,18 @@ public String toString() { } /** - * This class extends the {@link GatewayShardStarted} which contains all necessary shard metadata like + * This class extends the {@link GatewayStartedShard} which contains all necessary shard metadata like * allocationId and replication checkpoint. It also has DiscoveryNode which is needed by * {@link PrimaryShardAllocator} and {@link PrimaryShardBatchAllocator} to make allocation decision. * This class removes the dependency of * {@link TransportNodesListGatewayStartedShards.NodeGatewayStartedShards} to make allocation decisions by * {@link PrimaryShardAllocator} or {@link PrimaryShardBatchAllocator}. */ - public static class NodeGatewayShardStarted extends GatewayShardStarted { + public static class NodeGatewayStartedShard extends GatewayStartedShard { private final DiscoveryNode node; - public NodeGatewayShardStarted( + public NodeGatewayStartedShard( String allocationId, boolean primary, ReplicationCheckpoint replicationCheckpoint, diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java index f18ae26c0c8c2..4b1f611bb88ab 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java @@ -53,7 +53,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.indices.IndicesService; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.store.ShardAttributes; @@ -155,7 +155,7 @@ protected NodesGatewayStartedShards newResponse( @Override protected NodeGatewayStartedShards nodeOperation(NodeRequest request) { try { - GatewayShardStarted shardInfo = getShardInfoOnLocalNode( + GatewayStartedShard shardInfo = getShardInfoOnLocalNode( logger, request.getShardId(), namedXContentRegistry, @@ -167,7 +167,7 @@ protected NodeGatewayStartedShards nodeOperation(NodeRequest request) { ); return new NodeGatewayStartedShards( clusterService.localNode(), - new GatewayShardStarted( + new GatewayStartedShard( shardInfo.allocationId(), shardInfo.primary(), shardInfo.replicationCheckpoint(), @@ -305,7 +305,7 @@ public String getCustomDataPath() { * @opensearch.internal */ public static class NodeGatewayStartedShards extends BaseNodeResponse { - private final GatewayShardStarted gatewayShardStarted; + private final GatewayStartedShard gatewayStartedShard; public NodeGatewayStartedShards(StreamInput in) throws IOException { super(in); @@ -323,33 +323,33 @@ public NodeGatewayStartedShards(StreamInput in) throws IOException { } else { replicationCheckpoint = null; } - this.gatewayShardStarted = new GatewayShardStarted(allocationId, primary, replicationCheckpoint, storeException); + this.gatewayStartedShard = new GatewayStartedShard(allocationId, primary, replicationCheckpoint, storeException); } - public GatewayShardStarted getGatewayShardStarted() { - return gatewayShardStarted; + public GatewayStartedShard getGatewayShardStarted() { + return gatewayStartedShard; } - public NodeGatewayStartedShards(DiscoveryNode node, GatewayShardStarted gatewayShardStarted) { + public NodeGatewayStartedShards(DiscoveryNode node, GatewayStartedShard gatewayStartedShard) { super(node); - this.gatewayShardStarted = gatewayShardStarted; + this.gatewayStartedShard = gatewayStartedShard; } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeOptionalString(gatewayShardStarted.allocationId()); - out.writeBoolean(gatewayShardStarted.primary()); - if (gatewayShardStarted.storeException() != null) { + out.writeOptionalString(gatewayStartedShard.allocationId()); + out.writeBoolean(gatewayStartedShard.primary()); + if (gatewayStartedShard.storeException() != null) { out.writeBoolean(true); - out.writeException(gatewayShardStarted.storeException()); + out.writeException(gatewayStartedShard.storeException()); } else { out.writeBoolean(false); } if (out.getVersion().onOrAfter(Version.V_2_3_0)) { - if (gatewayShardStarted.replicationCheckpoint() != null) { + if (gatewayStartedShard.replicationCheckpoint() != null) { out.writeBoolean(true); - gatewayShardStarted.replicationCheckpoint().writeTo(out); + gatewayStartedShard.replicationCheckpoint().writeTo(out); } else { out.writeBoolean(false); } @@ -367,17 +367,17 @@ public boolean equals(Object o) { NodeGatewayStartedShards that = (NodeGatewayStartedShards) o; - return gatewayShardStarted.equals(that.gatewayShardStarted); + return gatewayStartedShard.equals(that.gatewayStartedShard); } @Override public int hashCode() { - return gatewayShardStarted.hashCode(); + return gatewayStartedShard.hashCode(); } @Override public String toString() { - return gatewayShardStarted.toString(); + return gatewayStartedShard.toString(); } } } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java index d8e83b955d46d..dc5d85b17bc32 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java @@ -27,7 +27,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayShardStarted; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.indices.IndicesService; import org.opensearch.indices.store.ShardAttributes; import org.opensearch.threadpool.ThreadPool; @@ -135,7 +135,7 @@ protected NodesGatewayStartedShardsBatch newResponse( */ @Override protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { - Map shardsOnNode = new HashMap<>(); + Map shardsOnNode = new HashMap<>(); for (ShardAttributes shardAttr : request.shardAttributes.values()) { final ShardId shardId = shardAttr.getShardId(); try { @@ -155,7 +155,7 @@ protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { } catch (Exception e) { shardsOnNode.put( shardId, - new GatewayShardStarted(null, false, null, new OpenSearchException("failed to load started shards", e)) + new GatewayStartedShard(null, false, null, new OpenSearchException("failed to load started shards", e)) ); } } @@ -256,15 +256,15 @@ public void writeTo(StreamOutput out) throws IOException { * @opensearch.internal */ public static class NodeGatewayStartedShardsBatch extends BaseNodeResponse { - private final Map nodeGatewayStartedShardsBatch; + private final Map nodeGatewayStartedShardsBatch; - public Map getNodeGatewayStartedShardsBatch() { + public Map getNodeGatewayStartedShardsBatch() { return nodeGatewayStartedShardsBatch; } public NodeGatewayStartedShardsBatch(StreamInput in) throws IOException { super(in); - this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, GatewayShardStarted::new); + this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, GatewayStartedShard::new); } @Override @@ -273,7 +273,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeMap(nodeGatewayStartedShardsBatch, (o, k) -> k.writeTo(o), (o, v) -> v.writeTo(o)); } - public NodeGatewayStartedShardsBatch(DiscoveryNode node, Map nodeGatewayStartedShardsBatch) { + public NodeGatewayStartedShardsBatch(DiscoveryNode node, Map nodeGatewayStartedShardsBatch) { super(node); this.nodeGatewayStartedShardsBatch = nodeGatewayStartedShardsBatch; } diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java index cf3eed82fc940..e849f12143b4d 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardAllocatorTests.java @@ -843,7 +843,7 @@ public TestAllocator addData( node, new TransportNodesListGatewayStartedShards.NodeGatewayStartedShards( node, - new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( allocationId, primary, replicationCheckpoint, diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java index 0175961f6a274..4796def2b8902 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java @@ -289,9 +289,9 @@ public TestBatchAllocator addData( if (data == null) { data = new HashMap<>(); } - Map shardData = Map.of( + Map shardData = Map.of( shardId, - new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( allocationId, primary, replicationCheckpoint, @@ -313,10 +313,10 @@ public TestBatchAllocator addShardData( if (data == null) { data = new HashMap<>(); } - Map shardData = new HashMap<>(); + Map shardData = new HashMap<>(); shardData.put( shardId, - new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( allocationId, primary, replicationCheckpoint, diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java index c3897e66479be..b1695ff00e0cc 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java @@ -92,7 +92,7 @@ protected AsyncShardFetch.FetchResult fetchData(ShardR routing -> currentNodes.get(routing.currentNodeId()), routing -> new NodeGatewayStartedShards( currentNodes.get(routing.currentNodeId()), - new TransportNodesGatewayStartedShardHelper.GatewayShardStarted( + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( routing.allocationId().getId(), routing.primary(), getReplicationCheckpoint(shardId, routing.currentNodeId()),