From 7103e56a00ff4a17fb8e4e2087a8e69d916ed40c Mon Sep 17 00:00:00 2001 From: Aman Khare <85096200+amkhar@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:26:51 +0530 Subject: [PATCH 1/6] Add ShardBatchCache to support caching for TransportNodesListGatewayStartedShardsBatch (#12504) Signed-off-by: Aman Khare --- .../gateway/GatewayRecoveryTestUtils.java | 2 +- .../gateway/AsyncShardBatchFetch.java | 243 ++++++++++++++++++ .../opensearch/gateway/AsyncShardFetch.java | 11 +- .../gateway/AsyncShardFetchCache.java | 1 + .../gateway/PrimaryShardBatchAllocator.java | 5 +- .../gateway/ShardBatchResponseFactory.java | 51 ++++ ...ansportNodesGatewayStartedShardHelper.java | 11 +- ...ortNodesListGatewayStartedShardsBatch.java | 40 ++- .../indices/store/ShardAttributes.java | 21 +- ...sportNodesListShardStoreMetadataBatch.java | 7 +- .../gateway/AsyncShardFetchTests.java | 18 +- .../org/opensearch/gateway/BatchTestUtil.java | 25 ++ .../PrimaryShardBatchAllocatorTests.java | 22 +- .../gateway/ShardBatchCacheTests.java | 231 +++++++++++++++++ .../indices/store/ShardAttributesTests.java | 6 +- 15 files changed, 621 insertions(+), 73 deletions(-) create mode 100644 server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java create mode 100644 server/src/main/java/org/opensearch/gateway/ShardBatchResponseFactory.java create mode 100644 server/src/test/java/org/opensearch/gateway/BatchTestUtil.java create mode 100644 server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/GatewayRecoveryTestUtils.java b/server/src/internalClusterTest/java/org/opensearch/gateway/GatewayRecoveryTestUtils.java index 2b6a5b4ee6867..dc157681be6fa 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/GatewayRecoveryTestUtils.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/GatewayRecoveryTestUtils.java @@ -54,7 +54,7 @@ public static Map prepareRequestMap(String[] indices, ); for (int shardIdNum = 0; shardIdNum < primaryShardCount; shardIdNum++) { final ShardId shardId = new ShardId(index, shardIdNum); - shardIdShardAttributesMap.put(shardId, new ShardAttributes(shardId, customDataPath)); + shardIdShardAttributesMap.put(shardId, new ShardAttributes(customDataPath)); } } return shardIdShardAttributesMap; diff --git a/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java new file mode 100644 index 0000000000000..4f39a39cea678 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java @@ -0,0 +1,243 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.apache.logging.log4j.Logger; +import org.opensearch.action.support.nodes.BaseNodeResponse; +import org.opensearch.action.support.nodes.BaseNodesResponse; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.common.logging.Loggers; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.store.ShardAttributes; + +import java.lang.reflect.Array; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; + +import reactor.util.annotation.NonNull; + +/** + * Implementation of AsyncShardFetch with batching support. This class is responsible for executing the fetch + * part using the base class {@link AsyncShardFetch}. Other functionalities needed for a batch are only written here. + * This separation also takes care of the extra generic type V which is only needed for batch + * transport actions like {@link TransportNodesListGatewayStartedShardsBatch} and + * {@link org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch}. + * + * @param Response type of the transport action. + * @param Data type of shard level response. + * + * @opensearch.internal + */ +public abstract class AsyncShardBatchFetch extends AsyncShardFetch { + + @SuppressWarnings("unchecked") + AsyncShardBatchFetch( + Logger logger, + String type, + Map shardAttributesMap, + AsyncShardFetch.Lister, T> action, + String batchId, + Class clazz, + V emptyShardResponse, + Predicate emptyShardResponsePredicate, + ShardBatchResponseFactory responseFactory + ) { + super( + logger, + type, + shardAttributesMap, + action, + batchId, + new ShardBatchCache<>( + logger, + type, + shardAttributesMap, + "BatchID=[" + batchId + "]", + clazz, + emptyShardResponse, + emptyShardResponsePredicate, + responseFactory + ) + ); + } + + /** + * Remove a shard from the cache maintaining a full batch of shards. This is needed to clear the shard once it's + * assigned or failed. + * + * @param shardId shardId to be removed from the batch. + */ + public synchronized void clearShard(ShardId shardId) { + this.shardAttributesMap.remove(shardId); + this.cache.deleteShard(shardId); + } + + /** + * Cache implementation of transport actions returning batch of shards related data in the response. + * Store node level responses of transport actions like {@link TransportNodesListGatewayStartedShardsBatch} or + * {@link org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch} with memory efficient caching + * approach. This cache class is not thread safe, all of its methods are being called from + * {@link AsyncShardFetch} class which has synchronized blocks present to handle multiple threads. + * + * @param Response type of transport action. + * @param Data type of shard level response. + */ + static class ShardBatchCache extends AsyncShardFetchCache { + private final Map> cache; + private final Map shardIdToArray; + private final int batchSize; + private final Class shardResponseClass; + private final ShardBatchResponseFactory responseFactory; + private final V emptyResponse; + private final Predicate emptyShardResponsePredicate; + private final Logger logger; + + public ShardBatchCache( + Logger logger, + String type, + Map shardAttributesMap, + String logKey, + Class clazz, + V emptyResponse, + Predicate emptyShardResponsePredicate, + ShardBatchResponseFactory responseFactory + ) { + super(Loggers.getLogger(logger, "_" + logKey), type); + this.batchSize = shardAttributesMap.size(); + this.emptyShardResponsePredicate = emptyShardResponsePredicate; + cache = new HashMap<>(); + shardIdToArray = new HashMap<>(); + fillShardIdKeys(shardAttributesMap.keySet()); + this.shardResponseClass = clazz; + this.emptyResponse = emptyResponse; + this.logger = logger; + this.responseFactory = responseFactory; + } + + @Override + @NonNull + public Map getCache() { + return cache; + } + + @Override + public void deleteShard(ShardId shardId) { + if (shardIdToArray.containsKey(shardId)) { + Integer shardIdIndex = shardIdToArray.remove(shardId); + for (String nodeId : cache.keySet()) { + cache.get(nodeId).clearShard(shardIdIndex); + } + } + } + + @Override + public void initData(DiscoveryNode node) { + cache.put(node.getId(), new NodeEntry<>(node.getId(), shardResponseClass, batchSize, emptyShardResponsePredicate)); + } + + /** + * Put the response received from data nodes into the cache. + * Get shard level data from batch, then filter out if any shards received failures. + * After that complete storing the data at node level and mark fetching as done. + * + * @param node node from which we got the response. + * @param response shard metadata coming from node. + */ + @Override + public void putData(DiscoveryNode node, T response) { + NodeEntry nodeEntry = cache.get(node.getId()); + Map batchResponse = responseFactory.getShardBatchData(response); + nodeEntry.doneFetching(batchResponse, shardIdToArray); + } + + @Override + public T getData(DiscoveryNode node) { + return this.responseFactory.getNewResponse(node, getBatchData(cache.get(node.getId()))); + } + + private HashMap getBatchData(NodeEntry nodeEntry) { + V[] nodeShardEntries = nodeEntry.getData(); + boolean[] emptyResponses = nodeEntry.getEmptyShardResponse(); + HashMap shardData = new HashMap<>(); + for (Map.Entry shardIdEntry : shardIdToArray.entrySet()) { + ShardId shardId = shardIdEntry.getKey(); + Integer arrIndex = shardIdEntry.getValue(); + if (emptyResponses[arrIndex]) { + shardData.put(shardId, emptyResponse); + } else if (nodeShardEntries[arrIndex] != null) { + // ignore null responses here + shardData.put(shardId, nodeShardEntries[arrIndex]); + } + } + return shardData; + } + + private void fillShardIdKeys(Set shardIds) { + int shardIdIndex = 0; + for (ShardId shardId : shardIds) { + this.shardIdToArray.putIfAbsent(shardId, shardIdIndex++); + } + } + + /** + * A node entry, holding the state of the fetched data for a specific shard + * for a giving node. + */ + static class NodeEntry extends BaseNodeEntry { + private final V[] shardData; + private final boolean[] emptyShardResponse; // we can not rely on null entries of the shardData array, + // those null entries means that we need to ignore those entries. Empty responses on the other hand are + // actually needed in allocation/explain API response. So instead of storing full empty response object + // in cache, it's better to just store a boolean and create that object on the fly just before + // decision-making. + private final Predicate emptyShardResponsePredicate; + + NodeEntry(String nodeId, Class clazz, int batchSize, Predicate emptyShardResponsePredicate) { + super(nodeId); + this.shardData = (V[]) Array.newInstance(clazz, batchSize); + this.emptyShardResponse = new boolean[batchSize]; + this.emptyShardResponsePredicate = emptyShardResponsePredicate; + } + + void doneFetching(Map shardDataFromNode, Map shardIdKey) { + fillShardData(shardDataFromNode, shardIdKey); + super.doneFetching(); + } + + void clearShard(Integer shardIdIndex) { + this.shardData[shardIdIndex] = null; + emptyShardResponse[shardIdIndex] = false; + } + + V[] getData() { + return this.shardData; + } + + boolean[] getEmptyShardResponse() { + return emptyShardResponse; + } + + private void fillShardData(Map shardDataFromNode, Map shardIdKey) { + for (Map.Entry shardData : shardDataFromNode.entrySet()) { + if (shardData.getValue() != null) { + ShardId shardId = shardData.getKey(); + if (emptyShardResponsePredicate.test(shardData.getValue())) { + this.emptyShardResponse[shardIdKey.get(shardId)] = true; + this.shardData[shardIdKey.get(shardId)] = null; + } else { + this.shardData[shardIdKey.get(shardId)] = shardData.getValue(); + } + } + } + } + } + } +} diff --git a/server/src/main/java/org/opensearch/gateway/AsyncShardFetch.java b/server/src/main/java/org/opensearch/gateway/AsyncShardFetch.java index 3d129d4794a10..b664dd573ce67 100644 --- a/server/src/main/java/org/opensearch/gateway/AsyncShardFetch.java +++ b/server/src/main/java/org/opensearch/gateway/AsyncShardFetch.java @@ -82,10 +82,10 @@ public interface Lister, N protected final String type; protected final Map shardAttributesMap; private final Lister, T> action; - private final AsyncShardFetchCache cache; + protected final AsyncShardFetchCache cache; private final AtomicLong round = new AtomicLong(); private boolean closed; - private final String reroutingKey; + final String reroutingKey; private final Map> shardToIgnoreNodes = new HashMap<>(); @SuppressWarnings("unchecked") @@ -99,7 +99,7 @@ protected AsyncShardFetch( this.logger = logger; this.type = type; shardAttributesMap = new HashMap<>(); - shardAttributesMap.put(shardId, new ShardAttributes(shardId, customDataPath)); + shardAttributesMap.put(shardId, new ShardAttributes(customDataPath)); this.action = (Lister, T>) action; this.reroutingKey = "ShardId=[" + shardId.toString() + "]"; cache = new ShardCache<>(logger, reroutingKey, type); @@ -120,14 +120,15 @@ protected AsyncShardFetch( String type, Map shardAttributesMap, Lister, T> action, - String batchId + String batchId, + AsyncShardFetchCache cache ) { this.logger = logger; this.type = type; this.shardAttributesMap = shardAttributesMap; this.action = (Lister, T>) action; this.reroutingKey = "BatchID=[" + batchId + "]"; - cache = new ShardCache<>(logger, reroutingKey, type); + this.cache = cache; } @Override diff --git a/server/src/main/java/org/opensearch/gateway/AsyncShardFetchCache.java b/server/src/main/java/org/opensearch/gateway/AsyncShardFetchCache.java index 3140ceef4f3ee..2a4e6181467b0 100644 --- a/server/src/main/java/org/opensearch/gateway/AsyncShardFetchCache.java +++ b/server/src/main/java/org/opensearch/gateway/AsyncShardFetchCache.java @@ -48,6 +48,7 @@ * @opensearch.internal */ public abstract class AsyncShardFetchCache { + private final Logger logger; private final String type; diff --git a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java index 8d222903b6f29..1979f33484d49 100644 --- a/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/PrimaryShardBatchAllocator.java @@ -15,6 +15,7 @@ import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.gateway.AsyncShardFetch.FetchResult; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.NodeGatewayStartedShard; import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; @@ -132,9 +133,7 @@ private static List adaptToNodeShardStates( // build data for a shard from all the nodes nodeResponses.forEach((node, nodeGatewayStartedShardsBatch) -> { - TransportNodesGatewayStartedShardHelper.GatewayStartedShard shardData = nodeGatewayStartedShardsBatch - .getNodeGatewayStartedShardsBatch() - .get(unassignedShard.shardId()); + GatewayStartedShard shardData = nodeGatewayStartedShardsBatch.getNodeGatewayStartedShardsBatch().get(unassignedShard.shardId()); nodeShardStates.add( new NodeGatewayStartedShard( shardData.allocationId(), diff --git a/server/src/main/java/org/opensearch/gateway/ShardBatchResponseFactory.java b/server/src/main/java/org/opensearch/gateway/ShardBatchResponseFactory.java new file mode 100644 index 0000000000000..4b85ef995f1e1 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/ShardBatchResponseFactory.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.opensearch.action.support.nodes.BaseNodeResponse; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; +import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata; +import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch; + +import java.util.Map; + +/** + * A factory class to create new responses of batch transport actions like + * {@link TransportNodesListGatewayStartedShardsBatch} or {@link org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch} + * + * @param Node level response returned by batch transport actions. + * @param Shard level metadata returned by batch transport actions. + */ +public class ShardBatchResponseFactory { + private final boolean primary; + + public ShardBatchResponseFactory(boolean primary) { + this.primary = primary; + } + + public T getNewResponse(DiscoveryNode node, Map shardData) { + if (primary) { + return (T) new NodeGatewayStartedShardsBatch(node, (Map) shardData); + } else { + return (T) new NodeStoreFilesMetadataBatch(node, (Map) shardData); + } + } + + public Map getShardBatchData(T response) { + if (primary) { + return (Map) ((NodeGatewayStartedShardsBatch) response).getNodeGatewayStartedShardsBatch(); + } else { + return (Map) ((NodeStoreFilesMetadataBatch) response).getNodeStoreFilesMetadataBatch(); + } + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java index 27cce76b1b694..2ddae1d8410c9 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesGatewayStartedShardHelper.java @@ -42,6 +42,8 @@ * @opensearch.internal */ public class TransportNodesGatewayStartedShardHelper { + public static final String INDEX_NOT_FOUND = "node doesn't have meta data for index"; + public static GatewayStartedShard getShardInfoOnLocalNode( Logger logger, final ShardId shardId, @@ -72,7 +74,7 @@ public static GatewayStartedShard getShardInfoOnLocalNode( customDataPath = new IndexSettings(metadata, settings).customDataPath(); } else { logger.trace("{} node doesn't have meta data for the requests index", shardId); - throw new OpenSearchException("node doesn't have meta data for index " + shardId.getIndex()); + throw new OpenSearchException(INDEX_NOT_FOUND + " " + shardId.getIndex()); } } // we don't have an open shard on the store, validate the files on disk are openable @@ -230,6 +232,13 @@ public String toString() { buf.append("]"); return buf.toString(); } + + public static boolean isEmpty(GatewayStartedShard gatewayStartedShard) { + return gatewayStartedShard.allocationId() == null + && gatewayStartedShard.primary() == false + && gatewayStartedShard.storeException() == null + && gatewayStartedShard.replicationCheckpoint() == null; + } } /** diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java index dc5d85b17bc32..89362988b4d85 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java @@ -8,7 +8,6 @@ package org.opensearch.gateway; -import org.opensearch.OpenSearchException; import org.opensearch.action.ActionType; import org.opensearch.action.FailedNodeException; import org.opensearch.action.support.ActionFilters; @@ -27,7 +26,6 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; -import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.indices.IndicesService; import org.opensearch.indices.store.ShardAttributes; import org.opensearch.threadpool.ThreadPool; @@ -40,6 +38,8 @@ import java.util.Map; import java.util.Objects; +import static org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; +import static org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.INDEX_NOT_FOUND; import static org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.getShardInfoOnLocalNode; /** @@ -136,8 +136,10 @@ protected NodesGatewayStartedShardsBatch newResponse( @Override protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { Map shardsOnNode = new HashMap<>(); - for (ShardAttributes shardAttr : request.shardAttributes.values()) { - final ShardId shardId = shardAttr.getShardId(); + // NOTE : If we ever change this for loop to run in parallel threads, we should re-visit the exception + // handling in AsyncShardBatchFetch class. + for (Map.Entry shardAttr : request.shardAttributes.entrySet()) { + final ShardId shardId = shardAttr.getKey(); try { shardsOnNode.put( shardId, @@ -147,16 +149,19 @@ protected NodeGatewayStartedShardsBatch nodeOperation(NodeRequest request) { namedXContentRegistry, nodeEnv, indicesService, - shardAttr.getCustomDataPath(), + shardAttr.getValue().getCustomDataPath(), settings, clusterService ) ); } catch (Exception e) { - shardsOnNode.put( - shardId, - new GatewayStartedShard(null, false, null, new OpenSearchException("failed to load started shards", e)) - ); + // should return null in case of known exceptions being returned from getShardInfoOnLocalNode method. + if (e instanceof IllegalStateException || e.getMessage().contains(INDEX_NOT_FOUND) || e instanceof IOException) { + shardsOnNode.put(shardId, null); + } else { + // return actual exception as it is for unknown exceptions + shardsOnNode.put(shardId, new GatewayStartedShard(null, false, null, e)); + } } } return new NodeGatewayStartedShardsBatch(clusterService.localNode(), shardsOnNode); @@ -264,13 +269,26 @@ public Map getNodeGatewayStartedShardsBatch() { public NodeGatewayStartedShardsBatch(StreamInput in) throws IOException { super(in); - this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, GatewayStartedShard::new); + this.nodeGatewayStartedShardsBatch = in.readMap(ShardId::new, i -> { + if (i.readBoolean()) { + return new GatewayStartedShard(i); + } else { + return null; + } + }); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeMap(nodeGatewayStartedShardsBatch, (o, k) -> k.writeTo(o), (o, v) -> v.writeTo(o)); + out.writeMap(nodeGatewayStartedShardsBatch, (o, k) -> k.writeTo(o), (o, v) -> { + if (v != null) { + o.writeBoolean(true); + v.writeTo(o); + } else { + o.writeBoolean(false); + } + }); } public NodeGatewayStartedShardsBatch(DiscoveryNode node, Map nodeGatewayStartedShardsBatch) { diff --git a/server/src/main/java/org/opensearch/indices/store/ShardAttributes.java b/server/src/main/java/org/opensearch/indices/store/ShardAttributes.java index 4ef4e91f7af8c..999acd02030ab 100644 --- a/server/src/main/java/org/opensearch/indices/store/ShardAttributes.java +++ b/server/src/main/java/org/opensearch/indices/store/ShardAttributes.java @@ -12,36 +12,27 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.index.shard.ShardId; -import org.opensearch.gateway.AsyncShardFetch; import java.io.IOException; /** - * This class contains information about the shard that needs to be sent as part of request in Transport Action implementing - * {@link AsyncShardFetch.Lister} to fetch shard information in async manner + * This class contains Attributes related to Shards that are necessary for making the + * {@link org.opensearch.gateway.TransportNodesListGatewayStartedShards} transport requests * * @opensearch.internal */ public class ShardAttributes implements Writeable { - private final ShardId shardId; @Nullable private final String customDataPath; - public ShardAttributes(ShardId shardId, String customDataPath) { - this.shardId = shardId; + public ShardAttributes(String customDataPath) { this.customDataPath = customDataPath; } public ShardAttributes(StreamInput in) throws IOException { - shardId = new ShardId(in); customDataPath = in.readString(); } - public ShardId getShardId() { - return shardId; - } - /** * Returns the custom data path that is used to look up information for this shard. * Returns an empty string if no custom data path is used for this index. @@ -53,7 +44,11 @@ public String getCustomDataPath() { } public void writeTo(StreamOutput out) throws IOException { - shardId.writeTo(out); out.writeString(customDataPath); } + + @Override + public String toString() { + return "ShardAttributes{" + ", customDataPath='" + customDataPath + '\'' + '}'; + } } diff --git a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java index 3f151fe1c5ca0..85d5bff4677ef 100644 --- a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java +++ b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java @@ -155,7 +155,7 @@ private Map listStoreMetadata(NodeRequest reque shardStoreMetadataMap.put(shardId, new NodeStoreFilesMetadata(storeFilesMetadata, null)); } catch (Exception e) { // should return null in case of known exceptions being returned from listShardMetadataInternal method. - if (e.getMessage().contains(INDEX_NOT_FOUND)) { + if (e.getMessage().contains(INDEX_NOT_FOUND) || e instanceof IOException) { shardStoreMetadataMap.put(shardId, null); } else { // return actual exception as it is for unknown exceptions @@ -276,6 +276,11 @@ public void writeTo(StreamOutput out) throws IOException { } } + boolean isEmpty(NodeStoreFilesMetadata response) { + return response.storeFilesMetadata() == null + || response.storeFilesMetadata().isEmpty() && response.getStoreFileFetchException() == null; + } + public Exception getStoreFileFetchException() { return storeFileFetchException; } diff --git a/server/src/test/java/org/opensearch/gateway/AsyncShardFetchTests.java b/server/src/test/java/org/opensearch/gateway/AsyncShardFetchTests.java index 4e5e9c71e1fe4..db97c3ece94ba 100644 --- a/server/src/test/java/org/opensearch/gateway/AsyncShardFetchTests.java +++ b/server/src/test/java/org/opensearch/gateway/AsyncShardFetchTests.java @@ -39,7 +39,6 @@ import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.core.index.shard.ShardId; -import org.opensearch.indices.store.ShardAttributes; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -47,7 +46,6 @@ import org.junit.Before; import java.util.Collections; -import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; @@ -86,16 +84,7 @@ public class AsyncShardFetchTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); this.threadPool = new TestThreadPool(getTestName()); - if (randomBoolean()) { - this.test = new TestFetch(threadPool); - } else { - HashMap shardToCustomDataPath = new HashMap<>(); - ShardId shardId0 = new ShardId("index1", "index_uuid1", 0); - ShardId shardId1 = new ShardId("index2", "index_uuid2", 0); - shardToCustomDataPath.put(shardId0, new ShardAttributes(shardId0, "")); - shardToCustomDataPath.put(shardId1, new ShardAttributes(shardId1, "")); - this.test = new TestFetch(threadPool, shardToCustomDataPath); - } + this.test = new TestFetch(threadPool); } @After @@ -414,11 +403,6 @@ static class Entry { this.threadPool = threadPool; } - TestFetch(ThreadPool threadPool, Map shardAttributesMap) { - super(LogManager.getLogger(TestFetch.class), "test", shardAttributesMap, null, "test-batch"); - this.threadPool = threadPool; - } - public void addSimulation(String nodeId, Response response) { simulations.put(nodeId, new Entry(response, null)); } diff --git a/server/src/test/java/org/opensearch/gateway/BatchTestUtil.java b/server/src/test/java/org/opensearch/gateway/BatchTestUtil.java new file mode 100644 index 0000000000000..69f0cfeeb2c7d --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/BatchTestUtil.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.opensearch.core.index.shard.ShardId; + +import java.util.ArrayList; +import java.util.List; + +public class BatchTestUtil { + public static List setUpShards(int numberOfShards) { + List shards = new ArrayList<>(); + for (int shardNumber = 0; shardNumber < numberOfShards; shardNumber++) { + ShardId shardId = new ShardId("test", "_na_", shardNumber); + shards.add(shardId); + } + return shards; + } +} diff --git a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java index 4796def2b8902..522ad2a64ea5d 100644 --- a/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/PrimaryShardBatchAllocatorTests.java @@ -29,6 +29,7 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.shard.ShardId; import org.opensearch.env.Environment; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; import org.opensearch.index.IndexSettings; import org.opensearch.index.codec.CodecService; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; @@ -289,14 +290,9 @@ public TestBatchAllocator addData( if (data == null) { data = new HashMap<>(); } - Map shardData = Map.of( + Map shardData = Map.of( shardId, - new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( - allocationId, - primary, - replicationCheckpoint, - storeException - ) + new GatewayStartedShard(allocationId, primary, replicationCheckpoint, storeException) ); data.put(node, new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch(node, shardData)); return this; @@ -313,16 +309,8 @@ public TestBatchAllocator addShardData( if (data == null) { data = new HashMap<>(); } - Map shardData = new HashMap<>(); - shardData.put( - shardId, - new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( - allocationId, - primary, - replicationCheckpoint, - storeException - ) - ); + Map shardData = new HashMap<>(); + shardData.put(shardId, new GatewayStartedShard(allocationId, primary, replicationCheckpoint, storeException)); if (data.get(node) != null) shardData.putAll(data.get(node).getNodeGatewayStartedShardsBatch()); data.put(node, new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch(node, shardData)); return this; diff --git a/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java new file mode 100644 index 0000000000000..1b42a31a4fd84 --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java @@ -0,0 +1,231 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.TestShardRouting; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper.GatewayStartedShard; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch; +import org.opensearch.indices.store.ShardAttributes; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class ShardBatchCacheTests extends OpenSearchAllocationTestCase { + private static final String BATCH_ID = "b1"; + private final DiscoveryNode node1 = newNode("node1"); + private final DiscoveryNode node2 = newNode("node2"); + // Needs to be enabled once ShardsBatchGatewayAllocator is pushed + // private final Map batchInfo = new HashMap<>(); + private AsyncShardBatchFetch.ShardBatchCache shardCache; + private List shardsInBatch = new ArrayList<>(); + private static final int NUMBER_OF_SHARDS_DEFAULT = 10; + + private enum ResponseType { + NULL, + EMPTY, + FAILURE, + VALID + } + + public void setupShardBatchCache(String batchId, int numberOfShards) { + Map shardAttributesMap = new HashMap<>(); + fillShards(shardAttributesMap, numberOfShards); + this.shardCache = new AsyncShardBatchFetch.ShardBatchCache<>( + logger, + "batch_shards_started", + shardAttributesMap, + "BatchID=[" + batchId + "]", + GatewayStartedShard.class, + new GatewayStartedShard(null, false, null, null), + GatewayStartedShard::isEmpty, + new ShardBatchResponseFactory<>(true) + ); + } + + public void testClearShardCache() { + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + ShardId shard = shardsInBatch.iterator().next(); + this.shardCache.initData(node1); + this.shardCache.markAsFetching(List.of(node1.getId()), 1); + this.shardCache.putData(node1, new NodeGatewayStartedShardsBatch(node1, getPrimaryResponse(shardsInBatch, ResponseType.EMPTY))); + assertTrue( + this.shardCache.getCacheData(DiscoveryNodes.builder().add(node1).build(), null) + .get(node1) + .getNodeGatewayStartedShardsBatch() + .containsKey(shard) + ); + this.shardCache.deleteShard(shard); + assertFalse( + this.shardCache.getCacheData(DiscoveryNodes.builder().add(node1).build(), null) + .get(node1) + .getNodeGatewayStartedShardsBatch() + .containsKey(shard) + ); + } + + public void testGetCacheData() { + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + ShardId shard = shardsInBatch.iterator().next(); + this.shardCache.initData(node1); + this.shardCache.initData(node2); + this.shardCache.markAsFetching(List.of(node1.getId(), node2.getId()), 1); + this.shardCache.putData(node1, new NodeGatewayStartedShardsBatch(node1, getPrimaryResponse(shardsInBatch, ResponseType.EMPTY))); + assertTrue( + this.shardCache.getCacheData(DiscoveryNodes.builder().add(node1).build(), null) + .get(node1) + .getNodeGatewayStartedShardsBatch() + .containsKey(shard) + ); + assertTrue( + this.shardCache.getCacheData(DiscoveryNodes.builder().add(node2).build(), null) + .get(node2) + .getNodeGatewayStartedShardsBatch() + .isEmpty() + ); + } + + public void testInitCacheData() { + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + this.shardCache.initData(node1); + this.shardCache.initData(node2); + assertEquals(2, shardCache.getCache().size()); + + // test getData without fetch + assertTrue(shardCache.getData(node1).getNodeGatewayStartedShardsBatch().isEmpty()); + } + + public void testPutData() { + // test empty and non-empty responses + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + ShardId shard = shardsInBatch.iterator().next(); + this.shardCache.initData(node1); + this.shardCache.initData(node2); + this.shardCache.markAsFetching(List.of(node1.getId(), node2.getId()), 1); + this.shardCache.putData(node1, new NodeGatewayStartedShardsBatch(node1, getPrimaryResponse(shardsInBatch, ResponseType.VALID))); + this.shardCache.putData(node2, new NodeGatewayStartedShardsBatch(node1, getPrimaryResponse(shardsInBatch, ResponseType.EMPTY))); + + // assert that fetching is done as both node's responses are stored in cache + assertFalse(this.shardCache.getCache().get(node1.getId()).isFetching()); + assertFalse(this.shardCache.getCache().get(node2.getId()).isFetching()); + + Map fetchData = shardCache.getCacheData( + DiscoveryNodes.builder().add(node1).add(node2).build(), + null + ); + assertEquals(2, fetchData.size()); + assertEquals(10, fetchData.get(node1).getNodeGatewayStartedShardsBatch().size()); + assertEquals("alloc-1", fetchData.get(node1).getNodeGatewayStartedShardsBatch().get(shard).allocationId()); + + assertEquals(10, fetchData.get(node2).getNodeGatewayStartedShardsBatch().size()); + assertTrue(GatewayStartedShard.isEmpty(fetchData.get(node2).getNodeGatewayStartedShardsBatch().get(shard))); + + // test GetData after fetch + assertEquals(10, shardCache.getData(node1).getNodeGatewayStartedShardsBatch().size()); + } + + public void testNullResponses() { + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + this.shardCache.initData(node1); + this.shardCache.markAsFetching(List.of(node1.getId()), 1); + this.shardCache.putData(node1, new NodeGatewayStartedShardsBatch(node1, getPrimaryResponse(shardsInBatch, ResponseType.NULL))); + + Map fetchData = shardCache.getCacheData( + DiscoveryNodes.builder().add(node1).build(), + null + ); + assertTrue(fetchData.get(node1).getNodeGatewayStartedShardsBatch().isEmpty()); + } + + public void testShardsDataWithException() { + setupShardBatchCache(BATCH_ID, NUMBER_OF_SHARDS_DEFAULT); + this.shardCache.initData(node1); + this.shardCache.initData(node2); + this.shardCache.markAsFetching(List.of(node1.getId(), node2.getId()), 1); + this.shardCache.putData(node1, new NodeGatewayStartedShardsBatch(node1, getFailedPrimaryResponse(shardsInBatch, 5))); + Map fetchData = shardCache.getCacheData( + DiscoveryNodes.builder().add(node1).add(node2).build(), + null + ); + + // assertEquals(5, batchInfo.size()); + assertEquals(2, fetchData.size()); + assertEquals(10, fetchData.get(node1).getNodeGatewayStartedShardsBatch().size()); + assertTrue(fetchData.get(node2).getNodeGatewayStartedShardsBatch().isEmpty()); + } + + private Map getPrimaryResponse(List shards, ResponseType responseType) { + int allocationId = 1; + Map shardData = new HashMap<>(); + for (ShardId shard : shards) { + switch (responseType) { + case NULL: + shardData.put(shard, null); + break; + case EMPTY: + shardData.put(shard, new GatewayStartedShard(null, false, null, null)); + break; + case VALID: + shardData.put(shard, new GatewayStartedShard("alloc-" + allocationId++, false, null, null)); + break; + default: + throw new AssertionError("unknown response type"); + } + } + return shardData; + } + + private Map getFailedPrimaryResponse(List shards, int failedShardsCount) { + int allocationId = 1; + Map shardData = new HashMap<>(); + for (ShardId shard : shards) { + if (failedShardsCount-- > 0) { + shardData.put( + shard, + new GatewayStartedShard("alloc-" + allocationId++, false, null, new OpenSearchRejectedExecutionException()) + ); + } else { + shardData.put(shard, new GatewayStartedShard("alloc-" + allocationId++, false, null, null)); + } + } + return shardData; + } + + private void fillShards(Map shardAttributesMap, int numberOfShards) { + shardsInBatch = BatchTestUtil.setUpShards(numberOfShards); + for (ShardId shardId : shardsInBatch) { + ShardAttributes attr = new ShardAttributes(""); + shardAttributesMap.put(shardId, attr); + // batchInfo.put( + // shardId, + // new ShardsBatchGatewayAllocator.ShardEntry(attr, randomShardRouting(shardId.getIndexName(), shardId.id())) + // ); + } + } + + private ShardRouting randomShardRouting(String index, int shard) { + ShardRoutingState state = randomFrom(ShardRoutingState.values()); + return TestShardRouting.newShardRouting( + index, + shard, + state == ShardRoutingState.UNASSIGNED ? null : "1", + state == ShardRoutingState.RELOCATING ? "2" : null, + state != ShardRoutingState.UNASSIGNED && randomBoolean(), + state + ); + } +} diff --git a/server/src/test/java/org/opensearch/indices/store/ShardAttributesTests.java b/server/src/test/java/org/opensearch/indices/store/ShardAttributesTests.java index 7fa95fefe72fd..94834bab1d98b 100644 --- a/server/src/test/java/org/opensearch/indices/store/ShardAttributesTests.java +++ b/server/src/test/java/org/opensearch/indices/store/ShardAttributesTests.java @@ -28,13 +28,12 @@ public class ShardAttributesTests extends OpenSearchTestCase { String customDataPath = "/path/to/data"; public void testShardAttributesConstructor() { - ShardAttributes attributes = new ShardAttributes(shardId, customDataPath); - assertEquals(attributes.getShardId(), shardId); + ShardAttributes attributes = new ShardAttributes(customDataPath); assertEquals(attributes.getCustomDataPath(), customDataPath); } public void testSerialization() throws IOException { - ShardAttributes attributes1 = new ShardAttributes(shardId, customDataPath); + ShardAttributes attributes1 = new ShardAttributes(customDataPath); ByteArrayOutputStream bytes = new ByteArrayOutputStream(); StreamOutput output = new DataOutputStreamOutput(new DataOutputStream(bytes)); attributes1.writeTo(output); @@ -42,7 +41,6 @@ public void testSerialization() throws IOException { StreamInput input = new InputStreamStreamInput(new ByteArrayInputStream(bytes.toByteArray())); ShardAttributes attributes2 = new ShardAttributes(input); input.close(); - assertEquals(attributes1.getShardId(), attributes2.getShardId()); assertEquals(attributes1.getCustomDataPath(), attributes2.getCustomDataPath()); } From 5d939b9204d1b90e257c8700a93ee542d6c4fe14 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 11 Apr 2024 14:24:18 -0400 Subject: [PATCH 2/6] [BUG] Fix org.opensearch.common.xcontent.XContentParserTests.testString flaky test (#13164) Signed-off-by: Andriy Redko --- .../org/opensearch/common/xcontent/XContentParserTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/x-content/src/test/java/org/opensearch/common/xcontent/XContentParserTests.java b/libs/x-content/src/test/java/org/opensearch/common/xcontent/XContentParserTests.java index 81a2b0e290121..16816a03d742d 100644 --- a/libs/x-content/src/test/java/org/opensearch/common/xcontent/XContentParserTests.java +++ b/libs/x-content/src/test/java/org/opensearch/common/xcontent/XContentParserTests.java @@ -85,8 +85,8 @@ public class XContentParserTests extends OpenSearchTestCase { () -> randomAlphaOfLengthBetween(1, SmileXContent.DEFAULT_MAX_STRING_LEN), /* YAML parser limitation */ XContentType.YAML, - /* use 75% of the limit, difficult to get the exact size of the content right */ - () -> randomRealisticUnicodeOfCodepointLengthBetween(1, (int) (YamlXContent.DEFAULT_CODEPOINT_LIMIT * 0.75)) + /* use 50% of the limit, difficult to get the exact size of the content right */ + () -> randomRealisticUnicodeOfCodepointLengthBetween(1, (int) (YamlXContent.DEFAULT_CODEPOINT_LIMIT * 0.50)) ); private static final Map> OFF_LIMIT_GENERATORS = Map.of( From 52ce070f0924e66a0f8e400cc76fa98a3b87d75b Mon Sep 17 00:00:00 2001 From: Sagar <99425694+sgup432@users.noreply.github.com> Date: Thu, 11 Apr 2024 14:49:03 -0800 Subject: [PATCH 3/6] [Tiered Caching] Make took time policy dynamic and add additional integ tests (#13063) --------- Signed-off-by: Sagar Upadhyaya Signed-off-by: Sagar <99425694+sgup432@users.noreply.github.com> --- CHANGELOG.md | 1 + .../TieredSpilloverCacheIT.java | 322 +++++++++++++++++- .../cache/common/policy/TookTimePolicy.java | 20 +- .../common/tier/TieredSpilloverCache.java | 84 ++++- .../tier/TieredSpilloverCachePlugin.java | 8 +- .../tier/TieredSpilloverCacheSettings.java | 30 +- .../common/policy/TookTimePolicyTests.java | 18 +- .../cache/common/tier/MockDiskCache.java | 48 ++- .../tier/TieredSpilloverCacheTests.java | 24 +- .../cache/store/config/CacheConfig.java | 14 + .../common/settings/ClusterSettings.java | 11 +- .../indices/IndicesRequestCache.java | 5 +- .../opensearch/indices/IndicesService.java | 2 +- .../indices/IndicesRequestCacheTests.java | 67 +++- 14 files changed, 600 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 492da15def01b..f8b3e10a37a37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865)) - [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) - [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) +- [Tiered Caching] Make took time caching policy setting dynamic ([#13063](https://github.com/opensearch-project/OpenSearch/pull/13063)) - Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) - Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) - Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656)) diff --git a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java index 568ac4d188c51..977a66c53b7e8 100644 --- a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java +++ b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java @@ -12,21 +12,31 @@ import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; +import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheResponse; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchType; import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.cache.settings.CacheSettings; import org.opensearch.common.cache.store.OpenSearchOnHeapCache; +import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.cache.request.RequestCacheStats; +import org.opensearch.index.query.QueryBuilders; import org.opensearch.indices.IndicesRequestCache; import org.opensearch.plugins.CachePlugin; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.PluginInfo; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.hamcrest.OpenSearchAssertions; import org.junit.Assert; import java.time.ZoneId; @@ -34,15 +44,20 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings.MAXIMUM_SIZE_IN_BYTES_KEY; +import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.greaterThan; +@OpenSearchIntegTestCase.ClusterScope(numDataNodes = 0, scope = OpenSearchIntegTestCase.Scope.TEST) public class TieredSpilloverCacheIT extends OpenSearchIntegTestCase { @Override @@ -50,15 +65,9 @@ protected Collection> nodePlugins() { return Arrays.asList(TieredSpilloverCachePlugin.class, MockDiskCachePlugin.class); } - @Override - protected Settings featureFlagSettings() { - return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(); - } - - @Override - protected Settings nodeSettings(int nodeOrdinal) { + private Settings defaultSettings(String onHeapCacheSizeInBytesOrPecentage) { return Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) + .put(FeatureFlags.PLUGGABLE_CACHE, "true") .put( CacheSettings.getConcreteStoreNameSettingForCacheType(CacheType.INDICES_REQUEST_CACHE).getKey(), TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME @@ -75,10 +84,17 @@ protected Settings nodeSettings(int nodeOrdinal) { ).getKey(), MockDiskCache.MockDiskCacheFactory.NAME ) + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSizeInBytesOrPecentage + ) .build(); } public void testPluginsAreInstalled() { + internalCluster().startNode(Settings.builder().put(defaultSettings("1%")).build()); NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(); nodesInfoRequest.addMetric(NodesInfoRequest.Metric.PLUGINS.metricName()); NodesInfoResponse nodesInfoResponse = OpenSearchIntegTestCase.client().admin().cluster().nodesInfo(nodesInfoRequest).actionGet(); @@ -90,11 +106,12 @@ public void testPluginsAreInstalled() { .collect(Collectors.toList()); Assert.assertTrue( pluginInfos.stream() - .anyMatch(pluginInfo -> pluginInfo.getName().equals("org.opensearch.cache.common" + ".tier.TieredSpilloverCachePlugin")) + .anyMatch(pluginInfo -> pluginInfo.getName().equals("org.opensearch.cache.common.tier.TieredSpilloverCachePlugin")) ); } public void testSanityChecksWithIndicesRequestCache() throws InterruptedException { + internalCluster().startNodes(3, Settings.builder().put(defaultSettings("1%")).build()); Client client = client(); assertAcked( client.admin() @@ -118,10 +135,7 @@ public void testSanityChecksWithIndicesRequestCache() throws InterruptedExceptio .setSize(0) .setSearchType(SearchType.QUERY_THEN_FETCH) .addAggregation( - dateHistogram("histo").field("f") - .timeZone(ZoneId.of("+01:00")) - .minDocCount(0) - .dateHistogramInterval(DateHistogramInterval.MONTH) + dateHistogram("histo").field("f").timeZone(ZoneId.of("+01:00")).minDocCount(0).calendarInterval(DateHistogramInterval.MONTH) ) .get(); assertSearchResponse(r1); @@ -133,6 +147,288 @@ public void testSanityChecksWithIndicesRequestCache() throws InterruptedExceptio ); } + public void testWithDynamicTookTimePolicy() throws Exception { + int onHeapCacheSizeInBytes = 2000; + internalCluster().startNode(Settings.builder().put(defaultSettings(onHeapCacheSizeInBytes + "b")).build()); + Client client = client(); + assertAcked( + client.admin() + .indices() + .prepareCreate("index") + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.refresh_interval", -1) + ) + .get() + ); + // Step 1 : Set a very high value for took time policy so that no items evicted from onHeap cache are spilled + // to disk. And then hit requests so that few items are cached into cache. + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(100, TimeUnit.SECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + int numberOfIndexedItems = randomIntBetween(6, 10); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + indexRandom(true, client.prepareIndex("index").setSource("k" + iterator, "hello" + iterator)); + } + ensureSearchable("index"); + refreshAndWaitForReplication(); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); + long perQuerySizeInCacheInBytes = -1; + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + if (perQuerySizeInCacheInBytes == -1) { + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + perQuerySizeInCacheInBytes = requestCacheStats.getMemorySizeInBytes(); + } + assertSearchResponse(resp); + } + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + // Considering disk cache won't be used due to took time policy having a high value, we expect overall cache + // size to be less than or equal to onHeapCache size. + assertTrue(requestCacheStats.getMemorySizeInBytes() <= onHeapCacheSizeInBytes); + long entriesInCache = requestCacheStats.getMemorySizeInBytes() / perQuerySizeInCacheInBytes; + // All should be misses in the first attempt + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(numberOfIndexedItems - entriesInCache, requestCacheStats.getEvictions()); + assertEquals(0, requestCacheStats.getHitCount()); + + // Step 2: Again hit same set of queries as above, we still won't see any hits as items keeps getting evicted. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + // We still won't get any hits as items keep getting evicted in LRU fashion due to limited cache size. + assertTrue(requestCacheStats.getMemorySizeInBytes() <= onHeapCacheSizeInBytes); + assertEquals(numberOfIndexedItems * 2, requestCacheStats.getMissCount()); + assertEquals(numberOfIndexedItems * 2 - entriesInCache, requestCacheStats.getEvictions()); + assertEquals(0, requestCacheStats.getHitCount()); + long lastEvictionSeen = requestCacheStats.getEvictions(); + + // Step 3: Decrease took time policy to zero so that disk cache also comes into play. Now we should be able + // to cache all entries. + updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.MILLISECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + // All entries should get cached. + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + // No more evictions seen when compared with last step. + assertEquals(0, requestCacheStats.getEvictions() - lastEvictionSeen); + // Hit count should be equal to number of cache entries present in previous step. + assertEquals(entriesInCache, requestCacheStats.getHitCount()); + assertEquals(numberOfIndexedItems * 3 - entriesInCache, requestCacheStats.getMissCount()); + long lastHitCountSeen = requestCacheStats.getHitCount(); + long lastMissCountSeen = requestCacheStats.getMissCount(); + + // Step 4: Again hit the same requests, we should get hits for all entries. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + // All entries should get cached. + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + // No more evictions seen when compared with last step. + assertEquals(0, requestCacheStats.getEvictions() - lastEvictionSeen); + assertEquals(lastHitCountSeen + numberOfIndexedItems, requestCacheStats.getHitCount()); + assertEquals(0, lastMissCountSeen - requestCacheStats.getMissCount()); + } + + public void testInvalidationWithIndicesRequestCache() throws Exception { + int onHeapCacheSizeInBytes = 2000; + internalCluster().startNode( + Settings.builder() + .put(defaultSettings(onHeapCacheSizeInBytes + "b")) + .put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1)) + .build() + ); + Client client = client(); + assertAcked( + client.admin() + .indices() + .prepareCreate("index") + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.refresh_interval", -1) + ) + .get() + ); + // Update took time policy to zero so that all entries are eligible to be cached on disk. + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.MILLISECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + int numberOfIndexedItems = randomIntBetween(5, 10); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + indexRandom(true, client.prepareIndex("index").setSource("k" + iterator, "hello" + iterator)); + } + ensureSearchable("index"); + refreshAndWaitForReplication(); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); + long perQuerySizeInCacheInBytes = -1; + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + if (perQuerySizeInCacheInBytes == -1) { + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + perQuerySizeInCacheInBytes = requestCacheStats.getMemorySizeInBytes(); + } + assertSearchResponse(resp); + } + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + assertEquals(perQuerySizeInCacheInBytes * numberOfIndexedItems, requestCacheStats.getMemorySizeInBytes()); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = client.admin().indices().prepareStats("index").setRequestCache(true).get().getTotal().getRequestCache(); + assertEquals(numberOfIndexedItems, requestCacheStats.getHitCount()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(perQuerySizeInCacheInBytes * numberOfIndexedItems, requestCacheStats.getMemorySizeInBytes()); + assertEquals(0, requestCacheStats.getEvictions()); + // Explicit refresh would invalidate cache entries. + refreshAndWaitForReplication(); + assertBusy(() -> { + // Explicit refresh should clear up cache entries + assertTrue(getRequestCacheStats(client, "index").getMemorySizeInBytes() == 0); + }, 1, TimeUnit.SECONDS); + requestCacheStats = client.admin().indices().prepareStats("index").setRequestCache(true).get().getTotal().getRequestCache(); + assertEquals(0, requestCacheStats.getMemorySizeInBytes()); + // Hits and misses stats shouldn't get cleared up. + assertEquals(numberOfIndexedItems, requestCacheStats.getHitCount()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + } + + public void testWithExplicitCacheClear() throws Exception { + int onHeapCacheSizeInBytes = 2000; + internalCluster().startNode( + Settings.builder() + .put(defaultSettings(onHeapCacheSizeInBytes + "b")) + .put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1)) + .build() + ); + Client client = client(); + assertAcked( + client.admin() + .indices() + .prepareCreate("index") + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.refresh_interval", -1) + ) + .get() + ); + // Update took time policy to zero so that all entries are eligible to be cached on disk. + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.MILLISECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + int numberOfIndexedItems = randomIntBetween(5, 10); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + indexRandom(true, client.prepareIndex("index").setSource("k" + iterator, "hello" + iterator)); + } + ensureSearchable("index"); + refreshAndWaitForReplication(); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); + + long perQuerySizeInCacheInBytes = -1; + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + if (perQuerySizeInCacheInBytes == -1) { + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + perQuerySizeInCacheInBytes = requestCacheStats.getMemorySizeInBytes(); + } + assertSearchResponse(resp); + } + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + assertEquals(perQuerySizeInCacheInBytes * numberOfIndexedItems, requestCacheStats.getMemorySizeInBytes()); + + // Explicit clear the cache. + ClearIndicesCacheRequest request = new ClearIndicesCacheRequest("index"); + ClearIndicesCacheResponse response = client.admin().indices().clearCache(request).get(); + assertNoFailures(response); + + assertBusy(() -> { + // All entries should get cleared up. + assertTrue(getRequestCacheStats(client, "index").getMemorySizeInBytes() == 0); + }, 1, TimeUnit.SECONDS); + } + + private RequestCacheStats getRequestCacheStats(Client client, String indexName) { + return client.admin().indices().prepareStats(indexName).setRequestCache(true).get().getTotal().getRequestCache(); + } + public static class MockDiskCachePlugin extends Plugin implements CachePlugin { public MockDiskCachePlugin() {} diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/policy/TookTimePolicy.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/policy/TookTimePolicy.java index 96ef027c17187..4bc26803acf4c 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/policy/TookTimePolicy.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/policy/TookTimePolicy.java @@ -13,12 +13,16 @@ package org.opensearch.cache.common.policy; +import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.policy.CachedQueryResult; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.unit.TimeValue; import java.util.function.Function; import java.util.function.Predicate; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; + /** * A cache tier policy which accepts queries whose took time is greater than some threshold. * The threshold should be set to approximately the time it takes to get a result from the cache tier. @@ -30,7 +34,7 @@ public class TookTimePolicy implements Predicate { /** * The minimum took time to allow a query. Set to TimeValue.ZERO to let all data through. */ - private final TimeValue threshold; + private TimeValue threshold; /** * Function which extracts the relevant PolicyValues from a serialized CachedQueryResult @@ -41,13 +45,25 @@ public class TookTimePolicy implements Predicate { * Constructs a took time policy. * @param threshold the threshold * @param cachedResultParser the function providing policy values + * @param clusterSettings cluster settings + * @param cacheType cache type */ - public TookTimePolicy(TimeValue threshold, Function cachedResultParser) { + public TookTimePolicy( + TimeValue threshold, + Function cachedResultParser, + ClusterSettings clusterSettings, + CacheType cacheType + ) { if (threshold.compareTo(TimeValue.ZERO) < 0) { throw new IllegalArgumentException("Threshold for TookTimePolicy must be >= 0ms but was " + threshold.getStringRep()); } this.threshold = threshold; this.cachedResultParser = cachedResultParser; + clusterSettings.addSettingsUpdateConsumer(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(cacheType), this::setThreshold); + } + + private void setThreshold(TimeValue threshold) { + this.threshold = threshold; } /** diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java index 00a8eec93acc9..cab05732ba1c4 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java @@ -15,19 +15,21 @@ import org.opensearch.common.cache.LoadAwareCacheLoader; import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; +import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.ReleasableLock; -import org.opensearch.common.util.iterable.Iterables; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Objects; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -47,6 +49,9 @@ @ExperimentalApi public class TieredSpilloverCache implements ICache { + // Used to avoid caching stale entries in lower tiers. + private static final List SPILLOVER_REMOVAL_REASONS = List.of(RemovalReason.EVICTED, RemovalReason.CAPACITY); + private final ICache diskCache; private final ICache onHeapCache; private final RemovalListener removalListener; @@ -69,8 +74,11 @@ public class TieredSpilloverCache implements ICache { @Override public void onRemoval(RemovalNotification notification) { try (ReleasableLock ignore = writeLock.acquire()) { - if (evaluatePolicies(notification.getValue())) { + if (SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()) + && evaluatePolicies(notification.getValue())) { diskCache.put(notification.getKey(), notification.getValue()); + } else { + removalListener.onRemoval(notification); } } } @@ -81,6 +89,7 @@ public void onRemoval(RemovalNotification notification) { .setWeigher(builder.cacheConfig.getWeigher()) .setMaxSizeInBytes(builder.cacheConfig.getMaxSizeInBytes()) .setExpireAfterAccess(builder.cacheConfig.getExpireAfterAccess()) + .setClusterSettings(builder.cacheConfig.getClusterSettings()) .build(), builder.cacheType, builder.cacheFactories @@ -156,10 +165,11 @@ public void invalidateAll() { * Provides an iteration over both onHeap and disk keys. This is not protected from any mutations to the cache. * @return An iterable over (onHeap + disk) keys */ - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked" }) @Override public Iterable keys() { - return Iterables.concat(onHeapCache.keys(), diskCache.keys()); + Iterable[] iterables = (Iterable[]) new Iterable[] { onHeapCache.keys(), diskCache.keys() }; + return new ConcatenatedIterables(iterables); } @Override @@ -213,6 +223,67 @@ boolean evaluatePolicies(V value) { return true; } + /** + * ConcatenatedIterables which combines cache iterables and supports remove() functionality as well if underlying + * iterator supports it. + * @param Type of key. + */ + static class ConcatenatedIterables implements Iterable { + + final Iterable[] iterables; + + ConcatenatedIterables(Iterable[] iterables) { + this.iterables = iterables; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public Iterator iterator() { + Iterator[] iterators = (Iterator[]) new Iterator[iterables.length]; + for (int i = 0; i < iterables.length; i++) { + iterators[i] = iterables[i].iterator(); + } + return new ConcatenatedIterator<>(iterators); + } + + static class ConcatenatedIterator implements Iterator { + private final Iterator[] iterators; + private int currentIteratorIndex; + private Iterator currentIterator; + + public ConcatenatedIterator(Iterator[] iterators) { + this.iterators = iterators; + this.currentIteratorIndex = 0; + this.currentIterator = iterators[currentIteratorIndex]; + } + + @Override + public boolean hasNext() { + while (!currentIterator.hasNext()) { + currentIteratorIndex++; + if (currentIteratorIndex == iterators.length) { + return false; + } + currentIterator = iterators[currentIteratorIndex]; + } + return true; + } + + @Override + public T next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return currentIterator.next(); + } + + @Override + public void remove() { + currentIterator.remove(); + } + } + } + /** * Factory to create TieredSpilloverCache objects. */ @@ -253,8 +324,7 @@ public ICache create(CacheConfig config, CacheType cacheType, } ICache.Factory diskCacheFactory = cacheFactories.get(diskCacheStoreName); - TimeValue diskPolicyThreshold = TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD - .getConcreteSettingForNamespace(cacheType.getSettingPrefix()) + TimeValue diskPolicyThreshold = TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(cacheType) .get(settings); Function cachedResultParser = Objects.requireNonNull( config.getCachedResultParser(), @@ -266,7 +336,7 @@ public ICache create(CacheConfig config, CacheType cacheType, .setRemovalListener(config.getRemovalListener()) .setCacheConfig(config) .setCacheType(cacheType) - .addPolicy(new TookTimePolicy(diskPolicyThreshold, cachedResultParser)) + .addPolicy(new TookTimePolicy(diskPolicyThreshold, cachedResultParser, config.getClusterSettings(), cacheType)) .build(); } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java index 0cc8a711faaf5..dfd40199d859e 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java @@ -18,6 +18,8 @@ import java.util.List; import java.util.Map; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; + /** * Plugin for TieredSpilloverCache. */ @@ -51,11 +53,7 @@ public List> getSettings() { settingList.add( TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_STORE_NAME.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) ); - settingList.add( - TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD.getConcreteSettingForNamespace( - cacheType.getSettingPrefix() - ) - ); + settingList.add(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(cacheType)); } return settingList; } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java index 684307960b8a5..b89e8c517a351 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java @@ -8,9 +8,12 @@ package org.opensearch.cache.common.tier; +import org.opensearch.common.cache.CacheType; import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.TimeValue; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.TimeUnit; import static org.opensearch.common.settings.Setting.Property.NodeScope; @@ -42,17 +45,36 @@ public class TieredSpilloverCacheSettings { /** * Setting defining the minimum took time for a query to be allowed into the disk cache. */ - public static final Setting.AffixSetting TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD = Setting.suffixKeySetting( + private static final Setting.AffixSetting TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD = Setting.suffixKeySetting( TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME + ".disk.store.policies.took_time.threshold", (key) -> Setting.timeSetting( key, new TimeValue(10, TimeUnit.MILLISECONDS), // Default value for this setting TimeValue.ZERO, // Minimum value for this setting - NodeScope + NodeScope, + Setting.Property.Dynamic ) ); - // 10 ms was chosen as a safe value based on proof of concept, where we saw disk latencies in this range. - // Will be tuned further with future benchmarks. + + /** + * Stores took time policy settings for various cache types as these are dynamic so that can be registered and + * retrieved accordingly. + */ + public static final Map> TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; + + /** + * Fetches concrete took time policy settings. + */ + static { + Map> concreteTookTimePolicySettingMap = new HashMap<>(); + for (CacheType cacheType : CacheType.values()) { + concreteTookTimePolicySettingMap.put( + cacheType, + TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) + ); + } + TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP = concreteTookTimePolicySettingMap; + } /** * Default constructor diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/policy/TookTimePolicyTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/policy/TookTimePolicyTests.java index 237c9c7b79db4..000067280e50d 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/policy/TookTimePolicyTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/policy/TookTimePolicyTests.java @@ -12,20 +12,27 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHits; import org.opensearch.common.Randomness; +import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.search.TopDocsAndMaxScore; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.search.DocValueFormat; import org.opensearch.search.query.QuerySearchResult; import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; import java.io.IOException; +import java.util.HashSet; import java.util.Random; import java.util.function.Function; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; + public class TookTimePolicyTests extends OpenSearchTestCase { private final Function transformationFunction = (data) -> { try { @@ -35,8 +42,17 @@ public class TookTimePolicyTests extends OpenSearchTestCase { } }; + private ClusterSettings clusterSettings; + + @Before + public void setup() { + Settings settings = Settings.EMPTY; + clusterSettings = new ClusterSettings(settings, new HashSet<>()); + clusterSettings.registerSetting(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE)); + } + private TookTimePolicy getTookTimePolicy(TimeValue threshold) { - return new TookTimePolicy<>(threshold, transformationFunction); + return new TookTimePolicy<>(threshold, transformationFunction, clusterSettings, CacheType.INDICES_REQUEST_CACHE); } public void testTookTimePolicy() throws Exception { diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java index d8a6eb480a5a5..548c5d846dda5 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/MockDiskCache.java @@ -18,7 +18,9 @@ import org.opensearch.common.cache.store.builders.ICacheBuilder; import org.opensearch.common.cache.store.config.CacheConfig; +import java.util.Iterator; import java.util.Map; +import java.util.NoSuchElementException; import java.util.concurrent.ConcurrentHashMap; public class MockDiskCache implements ICache { @@ -79,7 +81,7 @@ public void invalidateAll() { @Override public Iterable keys() { - return this.cache.keySet(); + return () -> new CacheKeyIterator<>(cache, removalListener); } @Override @@ -156,4 +158,48 @@ public Builder setValueSerializer(Serializer valueSerializer) { } } + + /** + * Provides a iterator over keys. + * @param Type of key + * @param Type of value + */ + static class CacheKeyIterator implements Iterator { + private final Iterator> entryIterator; + private final Map cache; + private final RemovalListener removalListener; + private K currentKey; + + public CacheKeyIterator(Map cache, RemovalListener removalListener) { + this.entryIterator = cache.entrySet().iterator(); + this.removalListener = removalListener; + this.cache = cache; + } + + @Override + public boolean hasNext() { + return entryIterator.hasNext(); + } + + @Override + public K next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + Map.Entry entry = entryIterator.next(); + currentKey = entry.getKey(); + return currentKey; + } + + @Override + public void remove() { + if (currentKey == null) { + throw new IllegalStateException("No element to remove"); + } + V value = cache.get(currentKey); + cache.remove(currentKey); + this.removalListener.onRemoval(new RemovalNotification<>(currentKey, value, RemovalReason.INVALIDATED)); + currentKey = null; + } + } } diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java index b132952834f06..431aca51099a6 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java @@ -19,14 +19,17 @@ import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; import org.opensearch.common.metrics.CounterMetric; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.UUID; @@ -38,10 +41,20 @@ import java.util.function.Function; import java.util.function.Predicate; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; import static org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings.MAXIMUM_SIZE_IN_BYTES_KEY; public class TieredSpilloverCacheTests extends OpenSearchTestCase { + private ClusterSettings clusterSettings; + + @Before + public void setup() { + Settings settings = Settings.EMPTY; + clusterSettings = new ClusterSettings(settings, new HashSet<>()); + clusterSettings.registerSetting(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE)); + } + public void testComputeIfAbsentWithoutAnyOnHeapCacheEviction() throws Exception { int onHeapCacheSize = randomIntBetween(10, 30); int keyValueSize = 50; @@ -134,6 +147,7 @@ public void testComputeIfAbsentWithFactoryBasedCacheCreation() throws Exception .setSettings(settings) .setCachedResultParser(s -> new CachedQueryResult.PolicyValues(20_000_000L)) // Values will always appear to have taken // 20_000_000 ns = 20 ms to compute + .setClusterSettings(clusterSettings) .build(), CacheType.INDICES_REQUEST_CACHE, Map.of( @@ -959,9 +973,7 @@ public void testTookTimePolicyFromFactory() throws Exception { onHeapCacheSize * keyValueSize + "b" ) .put( - TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD.getConcreteSettingForNamespace( - CacheType.INDICES_REQUEST_CACHE.getSettingPrefix() - ).getKey(), + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), new TimeValue(timeValueThresholdNanos / 1_000_000) ) .build(); @@ -979,6 +991,7 @@ public CachedQueryResult.PolicyValues apply(String s) { return new CachedQueryResult.PolicyValues(tookTimeMap.get(s)); } }) + .setClusterSettings(clusterSettings) .build(), CacheType.INDICES_REQUEST_CACHE, Map.of( @@ -1024,8 +1037,9 @@ public CachedQueryResult.PolicyValues apply(String s) { public void testMinimumThresholdSettingValue() throws Exception { // Confirm we can't set TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD to below // TimeValue.ZERO (for example, MINUS_ONE) - Setting concreteSetting = TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD - .getConcreteSettingForNamespace(CacheType.INDICES_REQUEST_CACHE.getSettingPrefix()); + Setting concreteSetting = TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get( + CacheType.INDICES_REQUEST_CACHE + ); TimeValue validDuration = new TimeValue(0, TimeUnit.MILLISECONDS); Settings validSettings = Settings.builder().put(concreteSetting.getKey(), validDuration).build(); diff --git a/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java b/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java index 4c9881e845d42..e537ece759e65 100644 --- a/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java +++ b/server/src/main/java/org/opensearch/common/cache/store/config/CacheConfig.java @@ -12,6 +12,7 @@ import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.serializer.Serializer; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -61,6 +62,8 @@ public class CacheConfig { */ private final TimeValue expireAfterAccess; + private final ClusterSettings clusterSettings; + private CacheConfig(Builder builder) { this.keyType = builder.keyType; this.valueType = builder.valueType; @@ -72,6 +75,7 @@ private CacheConfig(Builder builder) { this.cachedResultParser = builder.cachedResultParser; this.maxSizeInBytes = builder.maxSizeInBytes; this.expireAfterAccess = builder.expireAfterAccess; + this.clusterSettings = builder.clusterSettings; } public Class getKeyType() { @@ -114,6 +118,10 @@ public TimeValue getExpireAfterAccess() { return expireAfterAccess; } + public ClusterSettings getClusterSettings() { + return clusterSettings; + } + /** * Builder class to build Cache config related parameters. * @param Type of key. @@ -138,6 +146,7 @@ public static class Builder { private long maxSizeInBytes; private TimeValue expireAfterAccess; + private ClusterSettings clusterSettings; public Builder() {} @@ -191,6 +200,11 @@ public Builder setExpireAfterAccess(TimeValue expireAfterAccess) { return this; } + public Builder setClusterSettings(ClusterSettings clusterSettings) { + this.clusterSettings = clusterSettings; + return this; + } + public CacheConfig build() { return new CacheConfig<>(this); } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 9d763c970c3e7..004973e50d43a 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -83,6 +83,7 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.settings.CacheSettings; +import org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings; import org.opensearch.common.logging.Loggers; import org.opensearch.common.network.NetworkModule; import org.opensearch.common.network.NetworkService; @@ -750,6 +751,14 @@ public void apply(Settings value, Settings current, Settings previous) { TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING ), List.of(FeatureFlags.PLUGGABLE_CACHE), - List.of(CacheSettings.getConcreteStoreNameSettingForCacheType(CacheType.INDICES_REQUEST_CACHE)) + List.of( + CacheSettings.getConcreteStoreNameSettingForCacheType(CacheType.INDICES_REQUEST_CACHE), + OpenSearchOnHeapCacheSettings.MAXIMUM_SIZE_IN_BYTES.getConcreteSettingForNamespace( + CacheType.INDICES_REQUEST_CACHE.getSettingPrefix() + ), + OpenSearchOnHeapCacheSettings.EXPIRE_AFTER_ACCESS_SETTING.getConcreteSettingForNamespace( + CacheType.INDICES_REQUEST_CACHE.getSettingPrefix() + ) + ) ); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index 34c8d6cf5e840..607ff721bd357 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -39,6 +39,7 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; import org.opensearch.OpenSearchParseException; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedSupplier; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; @@ -149,7 +150,8 @@ public final class IndicesRequestCache implements RemovalListener> cacheEntityFunction, CacheService cacheService, - ThreadPool threadPool + ThreadPool threadPool, + ClusterService clusterService ) { this.size = INDICES_CACHE_QUERY_SIZE.get(settings); this.expire = INDICES_CACHE_QUERY_EXPIRE.exists(settings) ? INDICES_CACHE_QUERY_EXPIRE.get(settings) : null; @@ -179,6 +181,7 @@ public final class IndicesRequestCache implements RemovalListener Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool + threadPool, + ClusterServiceUtils.createClusterService(threadPool) ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -160,7 +162,8 @@ public void testBasicOperationsCacheWithFeatureFlag() throws Exception { Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(), (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), cacheService, - threadPool + threadPool, + ClusterServiceUtils.createClusterService(threadPool) ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -228,7 +231,11 @@ public void testCacheDifferentReaders() throws Exception { return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -360,7 +367,11 @@ public void testCacheCleanupBasedOnZeroThreshold() throws Exception { return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), settings).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), settings).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -421,7 +432,11 @@ public void testCacheCleanupBasedOnStaleThreshold_StalenessEqualToThreshold() th return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), settings).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), settings).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -483,7 +498,11 @@ public void testStaleCount_OnRemovalNotificationOfStaleKey_DecrementsStaleCount( return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), settings).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), settings).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -556,7 +575,11 @@ public void testStaleCount_OnRemovalNotificationOfStaleKey_DoesNotDecrementsStal return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), settings).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), settings).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -629,7 +652,11 @@ public void testCacheCleanupBasedOnStaleThreshold_StalenessGreaterThanThreshold( return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), settings).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), settings).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -691,7 +718,11 @@ public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -749,7 +780,8 @@ public void testEviction() throws Exception { Settings.EMPTY, (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool + threadPool, + ClusterServiceUtils.createClusterService(threadPool) ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -780,7 +812,8 @@ public void testEviction() throws Exception { Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), size.getBytes() + 1 + "b").build(), (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool + threadPool, + ClusterServiceUtils.createClusterService(threadPool) ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -827,7 +860,11 @@ public void testClearAllEntityIdentity() throws Exception { return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); @@ -923,7 +960,11 @@ public void testInvalidate() throws Exception { return Optional.empty(); } return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), threadPool); + }), + new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), + threadPool, + ClusterServiceUtils.createClusterService(threadPool) + ); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); From 02e3c56a5d8279e1339a9f4dc2e1f7754d3ad699 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 11 Apr 2024 16:57:07 -0700 Subject: [PATCH 4/6] Replace the right version to fix backward compatibility introduced with DerivedFields feature (#13168) Signed-off-by: Rishabh Maurya --- .../org/opensearch/search/builder/SearchSourceBuilder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 77d5567b924eb..182350c22f697 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -289,7 +289,7 @@ public SearchSourceBuilder(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_13_0)) { includeNamedQueriesScore = in.readOptionalBoolean(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_14_0)) { if (in.readBoolean()) { derivedFieldsObject = in.readMap(); } @@ -365,7 +365,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_13_0)) { out.writeOptionalBoolean(includeNamedQueriesScore); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_14_0)) { boolean hasDerivedFieldsObject = derivedFieldsObject != null; out.writeBoolean(hasDerivedFieldsObject); if (hasDerivedFieldsObject) { From 73453718a1c85565a3f7e4309d6fa83bf9a30522 Mon Sep 17 00:00:00 2001 From: Sandeep Kumawat <2025sandeepkumawat@gmail.com> Date: Fri, 12 Apr 2024 11:51:14 +0530 Subject: [PATCH 5/6] =?UTF-8?q?Implement=20interface=20changes=20for=20s3?= =?UTF-8?q?=20plugin=20to=20read/write=20blob=20with=20obj=E2=80=A6=20(#13?= =?UTF-8?q?113)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implement interface changes for s3 plugin to read/write blob with object metadata --------- Signed-off-by: Sandeep Kumawat Signed-off-by: Sandeep Kumawat <2025sandeepkumawat@gmail.com> Co-authored-by: Sandeep Kumawat --- .../repositories/s3/S3BlobContainer.java | 60 ++++++++++++++++--- .../s3/S3RetryingInputStream.java | 7 +++ .../s3/async/AsyncTransferManager.java | 9 +++ .../repositories/s3/async/UploadRequest.java | 16 ++++- .../s3/S3BlobContainerMockClientTests.java | 11 +++- .../s3/S3BlobContainerRetriesTests.java | 59 ++++++++++++++++-- .../s3/S3BlobStoreContainerTests.java | 32 +++++++--- .../s3/S3RetryingInputStreamTests.java | 11 ++++ .../s3/async/AsyncTransferManagerTests.java | 25 ++++++-- .../common/blobstore/BlobContainer.java | 13 ++-- ...loadResponse.java => FetchBlobResult.java} | 7 ++- .../blobstore/stream/write/WriteContext.java | 2 +- .../transfer/BlobStoreTransferService.java | 4 +- .../translog/transfer/TransferService.java | 6 +- 14 files changed, 220 insertions(+), 42 deletions(-) rename server/src/main/java/org/opensearch/common/blobstore/{BlobDownloadResponse.java => FetchBlobResult.java} (82%) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 1f23a09a047f2..14829a066ca3a 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -62,6 +62,7 @@ import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable; +import software.amazon.awssdk.utils.CollectionUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -77,6 +78,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStoreException; import org.opensearch.common.blobstore.DeleteResult; +import org.opensearch.common.blobstore.FetchBlobResult; import org.opensearch.common.blobstore.stream.read.ReadContext; import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.blobstore.stream.write.WritePriority; @@ -138,6 +140,13 @@ public boolean blobExists(String blobName) { } } + @ExperimentalApi + @Override + public FetchBlobResult readBlobWithMetadata(String blobName) throws IOException { + S3RetryingInputStream s3RetryingInputStream = new S3RetryingInputStream(blobStore, buildKey(blobName)); + return new FetchBlobResult(s3RetryingInputStream, s3RetryingInputStream.getMetadata()); + } + @Override public InputStream readBlob(String blobName) throws IOException { return new S3RetryingInputStream(blobStore, buildKey(blobName)); @@ -169,12 +178,27 @@ public long readBlobPreferredLength() { */ @Override public void writeBlob(String blobName, InputStream inputStream, long blobSize, boolean failIfAlreadyExists) throws IOException { + writeBlobWithMetadata(blobName, inputStream, blobSize, failIfAlreadyExists, null); + } + + /** + * Write blob with its object metadata. + */ + @ExperimentalApi + @Override + public void writeBlobWithMetadata( + String blobName, + InputStream inputStream, + long blobSize, + boolean failIfAlreadyExists, + @Nullable Map metadata + ) throws IOException { assert inputStream.markSupported() : "No mark support on inputStream breaks the S3 SDK's ability to retry requests"; SocketAccess.doPrivilegedIOException(() -> { if (blobSize <= getLargeBlobThresholdInBytes()) { - executeSingleUpload(blobStore, buildKey(blobName), inputStream, blobSize); + executeSingleUpload(blobStore, buildKey(blobName), inputStream, blobSize, metadata); } else { - executeMultipartUpload(blobStore, buildKey(blobName), inputStream, blobSize); + executeMultipartUpload(blobStore, buildKey(blobName), inputStream, blobSize, metadata); } return null; }); @@ -190,7 +214,8 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp writeContext.getUploadFinalizer(), writeContext.doRemoteDataIntegrityCheck(), writeContext.getExpectedChecksum(), - blobStore.isUploadRetryEnabled() + blobStore.isUploadRetryEnabled(), + writeContext.getMetadata() ); try { if (uploadRequest.getContentLength() > ByteSizeUnit.GB.toBytes(10) && blobStore.isRedirectLargeUploads()) { @@ -203,7 +228,8 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp blobStore, uploadRequest.getKey(), inputStream.getInputStream(), - uploadRequest.getContentLength() + uploadRequest.getContentLength(), + uploadRequest.getMetadata() ); completionListener.onResponse(null); } catch (Exception ex) { @@ -542,8 +568,13 @@ private String buildKey(String blobName) { /** * Uploads a blob using a single upload request */ - void executeSingleUpload(final S3BlobStore blobStore, final String blobName, final InputStream input, final long blobSize) - throws IOException { + void executeSingleUpload( + final S3BlobStore blobStore, + final String blobName, + final InputStream input, + final long blobSize, + final Map metadata + ) throws IOException { // Extra safety checks if (blobSize > MAX_FILE_SIZE.getBytes()) { @@ -560,6 +591,10 @@ void executeSingleUpload(final S3BlobStore blobStore, final String blobName, fin .storageClass(blobStore.getStorageClass()) .acl(blobStore.getCannedACL()) .overrideConfiguration(o -> o.addMetricPublisher(blobStore.getStatsMetricPublisher().putObjectMetricPublisher)); + + if (CollectionUtils.isNotEmpty(metadata)) { + putObjectRequestBuilder = putObjectRequestBuilder.metadata(metadata); + } if (blobStore.serverSideEncryption()) { putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AES256); } @@ -583,8 +618,13 @@ void executeSingleUpload(final S3BlobStore blobStore, final String blobName, fin /** * Uploads a blob using multipart upload requests. */ - void executeMultipartUpload(final S3BlobStore blobStore, final String blobName, final InputStream input, final long blobSize) - throws IOException { + void executeMultipartUpload( + final S3BlobStore blobStore, + final String blobName, + final InputStream input, + final long blobSize, + final Map metadata + ) throws IOException { ensureMultiPartUploadSize(blobSize); final long partSize = blobStore.bufferSizeInBytes(); @@ -609,6 +649,10 @@ void executeMultipartUpload(final S3BlobStore blobStore, final String blobName, .acl(blobStore.getCannedACL()) .overrideConfiguration(o -> o.addMetricPublisher(blobStore.getStatsMetricPublisher().multipartUploadMetricCollector)); + if (CollectionUtils.isNotEmpty(metadata)) { + createMultipartUploadRequestBuilder.metadata(metadata); + } + if (blobStore.serverSideEncryption()) { createMultipartUploadRequestBuilder.serverSideEncryption(ServerSideEncryption.AES256); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RetryingInputStream.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RetryingInputStream.java index d7e47e0ab1bcc..2eb63178b19e0 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RetryingInputStream.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RetryingInputStream.java @@ -48,6 +48,7 @@ import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -77,6 +78,7 @@ class S3RetryingInputStream extends InputStream { private long currentOffset; private boolean closed; private boolean eof; + private Map metadata; S3RetryingInputStream(S3BlobStore blobStore, String blobKey) throws IOException { this(blobStore, blobKey, 0, Long.MAX_VALUE - 1); @@ -122,6 +124,7 @@ private void openStream() throws IOException { getObjectResponseInputStream.response().contentLength() ); this.currentStream = getObjectResponseInputStream; + this.metadata = getObjectResponseInputStream.response().metadata(); this.isStreamAborted.set(false); } catch (final SdkException e) { if (e instanceof S3Exception) { @@ -265,4 +268,8 @@ boolean isEof() { boolean isAborted() { return isStreamAborted.get(); } + + Map getMetadata() { + return this.metadata; + } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 2259780c95276..80538059d17b8 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -22,6 +22,7 @@ import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.utils.CollectionUtils; import software.amazon.awssdk.utils.CompletableFutureUtils; import org.apache.logging.log4j.LogManager; @@ -131,6 +132,10 @@ private void uploadInParts( .bucket(uploadRequest.getBucket()) .key(uploadRequest.getKey()) .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)); + + if (CollectionUtils.isNotEmpty(uploadRequest.getMetadata())) { + createMultipartUploadRequestBuilder.metadata(uploadRequest.getMetadata()); + } if (uploadRequest.doRemoteDataIntegrityCheck()) { createMultipartUploadRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); } @@ -327,6 +332,10 @@ private void uploadInOneChunk( .key(uploadRequest.getKey()) .contentLength(uploadRequest.getContentLength()) .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.putObjectMetricPublisher)); + + if (CollectionUtils.isNotEmpty(uploadRequest.getMetadata())) { + putObjectRequestBuilder.metadata(uploadRequest.getMetadata()); + } if (uploadRequest.doRemoteDataIntegrityCheck()) { putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java index a5304dc4a97d6..b944a72225d36 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/UploadRequest.java @@ -9,9 +9,11 @@ package org.opensearch.repositories.s3.async; import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.stream.write.WritePriority; import java.io.IOException; +import java.util.Map; /** * A model encapsulating all details for an upload to S3 @@ -24,8 +26,8 @@ public class UploadRequest { private final CheckedConsumer uploadFinalizer; private final boolean doRemoteDataIntegrityCheck; private final Long expectedChecksum; - private boolean uploadRetryEnabled; + private final Map metadata; /** * Construct a new UploadRequest object @@ -37,6 +39,7 @@ public class UploadRequest { * @param uploadFinalizer An upload finalizer to call once all parts are uploaded * @param doRemoteDataIntegrityCheck A boolean to inform vendor plugins whether remote data integrity checks need to be done * @param expectedChecksum Checksum of the file being uploaded for remote data integrity check + * @param metadata Metadata of the file being uploaded */ public UploadRequest( String bucket, @@ -46,7 +49,8 @@ public UploadRequest( CheckedConsumer uploadFinalizer, boolean doRemoteDataIntegrityCheck, Long expectedChecksum, - boolean uploadRetryEnabled + boolean uploadRetryEnabled, + @Nullable Map metadata ) { this.bucket = bucket; this.key = key; @@ -56,6 +60,7 @@ public UploadRequest( this.doRemoteDataIntegrityCheck = doRemoteDataIntegrityCheck; this.expectedChecksum = expectedChecksum; this.uploadRetryEnabled = uploadRetryEnabled; + this.metadata = metadata; } public String getBucket() { @@ -89,4 +94,11 @@ public Long getExpectedChecksum() { public boolean isUploadRetryEnabled() { return uploadRetryEnabled; } + + /** + * @return metadata of the blob to be uploaded + */ + public Map getMetadata() { + return metadata; + } } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java index 9e830c409a58b..4173f8b66387f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java @@ -57,6 +57,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.concurrent.CompletableFuture; @@ -75,6 +76,7 @@ import static org.opensearch.repositories.s3.S3Repository.BULK_DELETE_SIZE; import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; @@ -659,6 +661,7 @@ private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) t .writePriority(WritePriority.HIGH) .uploadFinalizer(Assert::assertTrue) .doRemoteDataIntegrityCheck(false) + .metadata(new HashMap<>()) .build(); s3BlobContainer.asyncBlobUpload(writeContext, completionListener); @@ -668,7 +671,13 @@ private void testLargeFilesRedirectedToSlowSyncClient(boolean expectException) t } else { assertNull(exceptionRef.get()); } - verify(s3BlobContainer, times(1)).executeMultipartUpload(any(S3BlobStore.class), anyString(), any(InputStream.class), anyLong()); + verify(s3BlobContainer, times(1)).executeMultipartUpload( + any(S3BlobStore.class), + anyString(), + any(InputStream.class), + anyLong(), + anyMap() + ); if (expectException) { verify(client, times(1)).abortMultipartUpload(any(AbortMultipartUploadRequest.class)); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java index 8e25ba4d950ef..10578090da75c 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -79,8 +79,10 @@ import java.net.SocketTimeoutException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; @@ -240,7 +242,7 @@ public InputStream readBlob(String blobName, long position, long length) throws }; } - public void testWriteBlobWithRetries() throws Exception { + public void writeBlobWithRetriesHelper(Map metadata) throws Exception { final int maxRetries = randomInt(5); final CountDown countDown = new CountDown(maxRetries + 1); @@ -280,11 +282,26 @@ public void testWriteBlobWithRetries() throws Exception { final BlobContainer blobContainer = createBlobContainer(maxRetries, null, true, null); try (InputStream stream = new ByteArrayInputStream(bytes)) { - blobContainer.writeBlob("write_blob_max_retries", stream, bytes.length, false); + if (metadata != null) { + blobContainer.writeBlobWithMetadata("write_blob_max_retries", stream, bytes.length, false, metadata); + } else { + blobContainer.writeBlob("write_blob_max_retries", stream, bytes.length, false); + } } assertThat(countDown.isCountedDown(), is(true)); } + public void testWriteBlobWithMetadataWithRetries() throws Exception { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + writeBlobWithRetriesHelper(metadata); + } + + public void testWriteBlobWithRetries() throws Exception { + writeBlobWithRetriesHelper(null); + } + public void testWriteBlobByStreamsWithRetries() throws Exception { final int maxRetries = randomInt(5); final CountDown countDown = new CountDown(maxRetries + 1); @@ -368,7 +385,7 @@ private int calculateNumberOfParts(long contentLength, long partSize) { return (int) ((contentLength % partSize) == 0 ? contentLength / partSize : (contentLength / partSize) + 1); } - public void testWriteBlobWithReadTimeouts() { + public void writeBlobWithReadTimeoutsHelper(Map metadata) { final byte[] bytes = randomByteArrayOfLength(randomIntBetween(10, 128)); final TimeValue readTimeout = TimeValue.timeValueMillis(randomIntBetween(100, 500)); final BlobContainer blobContainer = createBlobContainer(1, readTimeout, true, null); @@ -386,7 +403,11 @@ public void testWriteBlobWithReadTimeouts() { Exception exception = expectThrows(IOException.class, () -> { try (InputStream stream = new InputStreamIndexInput(new ByteArrayIndexInput("desc", bytes), bytes.length)) { - blobContainer.writeBlob("write_blob_timeout", stream, bytes.length, false); + if (metadata != null) { + blobContainer.writeBlobWithMetadata("write_blob_timeout", stream, bytes.length, false, metadata); + } else { + blobContainer.writeBlob("write_blob_timeout", stream, bytes.length, false); + } } }); assertThat( @@ -401,7 +422,18 @@ public void testWriteBlobWithReadTimeouts() { assertThat(exception.getCause().getCause().getMessage().toLowerCase(Locale.ROOT), containsString("read timed out")); } - public void testWriteLargeBlob() throws Exception { + public void testWriteBlobWithMetadataWithReadTimeouts() throws Exception { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + writeBlobWithReadTimeoutsHelper(metadata); + } + + public void testWriteBlobWithReadTimeouts() throws Exception { + writeBlobWithReadTimeoutsHelper(null); + } + + public void WriteLargeBlobHelper(Map metadata) throws Exception { final boolean useTimeout = rarely(); final TimeValue readTimeout = useTimeout ? TimeValue.timeValueMillis(randomIntBetween(100, 500)) : null; final ByteSizeValue bufferSize = new ByteSizeValue(5, ByteSizeUnit.MB); @@ -487,13 +519,28 @@ public void testWriteLargeBlob() throws Exception { } }); - blobContainer.writeBlob("write_large_blob", new ZeroInputStream(blobSize), blobSize, false); + if (metadata != null) { + blobContainer.writeBlobWithMetadata("write_large_blob", new ZeroInputStream(blobSize), blobSize, false, metadata); + } else { + blobContainer.writeBlob("write_large_blob", new ZeroInputStream(blobSize), blobSize, false); + } assertThat(countDownInitiate.isCountedDown(), is(true)); assertThat(countDownUploads.get(), equalTo(0)); assertThat(countDownComplete.isCountedDown(), is(true)); } + public void testWriteLargeBlobWithMetadata() throws Exception { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + WriteLargeBlobHelper(metadata); + } + + public void testWriteLargeBlob() throws Exception { + WriteLargeBlobHelper(null); + } + /** * Asserts that an InputStream is fully consumed, or aborted, when it is closed */ diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java index 2b45e9cfe2d4b..654d8a72690c4 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java @@ -90,6 +90,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -125,7 +126,7 @@ public void testExecuteSingleUploadBlobSizeTooLarge() { final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> blobContainer.executeSingleUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize) + () -> blobContainer.executeSingleUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize, null) ); assertEquals("Upload request size [" + blobSize + "] can't be larger than 5gb", e.getMessage()); } @@ -139,7 +140,13 @@ public void testExecuteSingleUploadBlobSizeLargerThanBufferSize() { final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> blobContainer.executeSingleUpload(blobStore, blobName, new ByteArrayInputStream(new byte[0]), ByteSizeUnit.MB.toBytes(2)) + () -> blobContainer.executeSingleUpload( + blobStore, + blobName, + new ByteArrayInputStream(new byte[0]), + ByteSizeUnit.MB.toBytes(2), + null + ) ); assertEquals("Upload request size [2097152] can't be larger than buffer size", e.getMessage()); } @@ -430,6 +437,10 @@ public void testExecuteSingleUpload() throws IOException { final String bucketName = randomAlphaOfLengthBetween(1, 10); final String blobName = randomAlphaOfLengthBetween(1, 10); + final Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + final BlobPath blobPath = new BlobPath(); if (randomBoolean()) { IntStream.of(randomIntBetween(1, 5)).forEach(value -> blobPath.add("path_" + value)); @@ -467,7 +478,7 @@ public void testExecuteSingleUpload() throws IOException { ); final ByteArrayInputStream inputStream = new ByteArrayInputStream(new byte[blobSize]); - blobContainer.executeSingleUpload(blobStore, blobName, inputStream, blobSize); + blobContainer.executeSingleUpload(blobStore, blobName, inputStream, blobSize, metadata); final PutObjectRequest request = putObjectRequestArgumentCaptor.getValue(); final RequestBody requestBody = requestBodyArgumentCaptor.getValue(); @@ -480,6 +491,7 @@ public void testExecuteSingleUpload() throws IOException { assertEquals(blobSize, request.contentLength().longValue()); assertEquals(storageClass, request.storageClass()); assertEquals(cannedAccessControlList, request.acl()); + assertEquals(metadata, request.metadata()); if (serverSideEncryption) { assertEquals(ServerSideEncryption.AES256, request.serverSideEncryption()); } @@ -492,7 +504,7 @@ public void testExecuteMultipartUploadBlobSizeTooLarge() { final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> blobContainer.executeMultipartUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize) + () -> blobContainer.executeMultipartUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize, null) ); assertEquals("Multipart upload request size [" + blobSize + "] can't be larger than 5tb", e.getMessage()); } @@ -504,7 +516,7 @@ public void testExecuteMultipartUploadBlobSizeTooSmall() { final IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> blobContainer.executeMultipartUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize) + () -> blobContainer.executeMultipartUpload(blobStore, randomAlphaOfLengthBetween(1, 10), null, blobSize, null) ); assertEquals("Multipart upload request size [" + blobSize + "] can't be smaller than 5mb", e.getMessage()); } @@ -513,6 +525,10 @@ public void testExecuteMultipartUpload() throws IOException { final String bucketName = randomAlphaOfLengthBetween(1, 10); final String blobName = randomAlphaOfLengthBetween(1, 10); + final Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + final BlobPath blobPath = new BlobPath(); if (randomBoolean()) { IntStream.of(randomIntBetween(1, 5)).forEach(value -> blobPath.add("path_" + value)); @@ -577,13 +593,15 @@ public void testExecuteMultipartUpload() throws IOException { final ByteArrayInputStream inputStream = new ByteArrayInputStream(new byte[0]); final S3BlobContainer blobContainer = new S3BlobContainer(blobPath, blobStore); - blobContainer.executeMultipartUpload(blobStore, blobName, inputStream, blobSize); + blobContainer.executeMultipartUpload(blobStore, blobName, inputStream, blobSize, metadata); final CreateMultipartUploadRequest initRequest = createMultipartUploadRequestArgumentCaptor.getValue(); assertEquals(bucketName, initRequest.bucket()); assertEquals(blobPath.buildAsString() + blobName, initRequest.key()); assertEquals(storageClass, initRequest.storageClass()); assertEquals(cannedAccessControlList, initRequest.acl()); + assertEquals(metadata, initRequest.metadata()); + if (serverSideEncryption) { assertEquals(ServerSideEncryption.AES256, initRequest.serverSideEncryption()); } @@ -686,7 +704,7 @@ public void testExecuteMultipartUploadAborted() { final IOException e = expectThrows(IOException.class, () -> { final S3BlobContainer blobContainer = new S3BlobContainer(blobPath, blobStore); - blobContainer.executeMultipartUpload(blobStore, blobName, new ByteArrayInputStream(new byte[0]), blobSize); + blobContainer.executeMultipartUpload(blobStore, blobName, new ByteArrayInputStream(new byte[0]), blobSize, null); }); assertEquals("Unable to upload object [" + blobName + "] using multipart upload", e.getMessage()); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RetryingInputStreamTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RetryingInputStreamTests.java index b38d5119b4108..d884a46f3ecc5 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RetryingInputStreamTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RetryingInputStreamTests.java @@ -43,6 +43,8 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import static org.hamcrest.Matchers.is; import static org.mockito.Mockito.any; @@ -61,6 +63,15 @@ public void testInputStreamFullyConsumed() throws IOException { assertThat(stream.isAborted(), is(false)); } + public void testInputStreamGetMetadata() throws IOException { + final byte[] expectedBytes = randomByteArrayOfLength(randomIntBetween(1, 512)); + + final S3RetryingInputStream stream = createInputStream(expectedBytes, 0L, (long) (Integer.MAX_VALUE - 1)); + + Map metadata = new HashMap<>(); + assertEquals(stream.getMetadata(), metadata); + } + public void testInputStreamIsAborted() throws IOException { final byte[] expectedBytes = randomByteArrayOfLength(randomIntBetween(10, 512)); final byte[] actualBytes = new byte[randomIntBetween(1, Math.max(1, expectedBytes.length - 1))]; diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index b753b847df869..04d1819bef02b 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -40,7 +40,9 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; @@ -78,11 +80,14 @@ public void testOneChunkUpload() { ); AtomicReference streamRef = new AtomicReference<>(); + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(1), WritePriority.HIGH, uploadSuccess -> { // do nothing - }, false, null, true), + }, false, null, true, metadata), new StreamContext((partIdx, partSize, position) -> { streamRef.set(new ZeroInputStream(partSize)); return new InputStreamContainer(streamRef.get(), partSize, position); @@ -123,11 +128,15 @@ public void testOneChunkUploadCorruption() { deleteObjectResponseCompletableFuture.complete(DeleteObjectResponse.builder().build()); when(s3AsyncClient.deleteObject(any(DeleteObjectRequest.class))).thenReturn(deleteObjectResponseCompletableFuture); + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(1), WritePriority.HIGH, uploadSuccess -> { // do nothing - }, false, null, true), + }, false, null, true, metadata), new StreamContext( (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), ByteSizeUnit.MB.toBytes(1), @@ -175,12 +184,16 @@ public void testMultipartUpload() { abortMultipartUploadResponseCompletableFuture ); + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + List streams = new ArrayList<>(); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(5), WritePriority.HIGH, uploadSuccess -> { // do nothing - }, true, 3376132981L, true), + }, true, 3376132981L, true, metadata), new StreamContext((partIdx, partSize, position) -> { InputStream stream = new ZeroInputStream(partSize); streams.add(stream); @@ -236,11 +249,15 @@ public void testMultipartUploadCorruption() { abortMultipartUploadResponseCompletableFuture ); + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(5), WritePriority.HIGH, uploadSuccess -> { // do nothing - }, true, 0L, true), + }, true, 0L, true, metadata), new StreamContext( (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), ByteSizeUnit.MB.toBytes(1), diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobContainer.java b/server/src/main/java/org/opensearch/common/blobstore/BlobContainer.java index e16e75de7f27d..4f5f8d4b1ef5f 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobContainer.java +++ b/server/src/main/java/org/opensearch/common/blobstore/BlobContainer.java @@ -32,6 +32,7 @@ package org.opensearch.common.blobstore; +import org.opensearch.common.Nullable; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.core.action.ActionListener; @@ -79,16 +80,16 @@ public interface BlobContainer { InputStream readBlob(String blobName) throws IOException; /** - * Creates a new {@link BlobDownloadResponse} for the given blob name. + * Creates a new {@link FetchBlobResult} for the given blob name. * * @param blobName * The name of the blob to get an {@link InputStream} for. - * @return The {@link BlobDownloadResponse} of the blob. + * @return The {@link FetchBlobResult} of the blob. * @throws NoSuchFileException if the blob does not exist * @throws IOException if the blob can not be read. */ @ExperimentalApi - default BlobDownloadResponse readBlobWithMetadata(String blobName) throws IOException { + default FetchBlobResult readBlobWithMetadata(String blobName) throws IOException { throw new UnsupportedOperationException("readBlobWithMetadata is not implemented yet"); }; @@ -166,9 +167,9 @@ default long readBlobPreferredLength() { default void writeBlobWithMetadata( String blobName, InputStream inputStream, - Map metadata, long blobSize, - boolean failIfAlreadyExists + boolean failIfAlreadyExists, + @Nullable Map metadata ) throws IOException { throw new UnsupportedOperationException("writeBlobWithMetadata is not implemented yet"); }; @@ -219,7 +220,7 @@ default void writeBlobWithMetadata( default void writeBlobAtomicWithMetadata( String blobName, InputStream inputStream, - Map metadata, + @Nullable Map metadata, long blobSize, boolean failIfAlreadyExists ) throws IOException { diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobDownloadResponse.java b/server/src/main/java/org/opensearch/common/blobstore/FetchBlobResult.java similarity index 82% rename from server/src/main/java/org/opensearch/common/blobstore/BlobDownloadResponse.java rename to server/src/main/java/org/opensearch/common/blobstore/FetchBlobResult.java index 97f3e4a16a76c..55aca771b586c 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobDownloadResponse.java +++ b/server/src/main/java/org/opensearch/common/blobstore/FetchBlobResult.java @@ -8,6 +8,8 @@ package org.opensearch.common.blobstore; +import org.opensearch.common.annotation.ExperimentalApi; + import java.io.InputStream; import java.util.Map; @@ -17,7 +19,8 @@ * * @opensearch.experimental */ -public class BlobDownloadResponse { +@ExperimentalApi +public class FetchBlobResult { /** * Downloaded blob InputStream @@ -37,7 +40,7 @@ public Map getMetadata() { return metadata; } - public BlobDownloadResponse(InputStream inputStream, Map metadata) { + public FetchBlobResult(InputStream inputStream, Map metadata) { this.inputStream = inputStream; this.metadata = metadata; } diff --git a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WriteContext.java b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WriteContext.java index d14800e82e495..9a7fc24f7e484 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WriteContext.java +++ b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WriteContext.java @@ -52,7 +52,7 @@ private WriteContext( CheckedConsumer uploadFinalizer, boolean doRemoteDataIntegrityCheck, @Nullable Long expectedChecksum, - Map metadata + @Nullable Map metadata ) { this.fileName = fileName; this.streamContextSupplier = streamContextSupplier; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java index a1d5041ff9aff..bec2d78d9af62 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java @@ -15,10 +15,10 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobDownloadResponse; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.FetchBlobResult; import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; @@ -168,7 +168,7 @@ public InputStream downloadBlob(Iterable path, String fileName) throws I @Override @ExperimentalApi - public BlobDownloadResponse downloadBlobWithMetadata(Iterable path, String fileName) throws IOException { + public FetchBlobResult downloadBlobWithMetadata(Iterable path, String fileName) throws IOException { return blobStore.blobContainer((BlobPath) path).readBlobWithMetadata(fileName); } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java index 5fe8b02058383..0894ebf500ebd 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java @@ -9,9 +9,9 @@ package org.opensearch.index.translog.transfer; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.common.blobstore.BlobDownloadResponse; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.FetchBlobResult; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.core.action.ActionListener; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; @@ -131,11 +131,11 @@ void uploadBlobs( * * @param path the remote path from where download should be made * @param fileName the name of the file - * @return {@link BlobDownloadResponse} of the remote file + * @return {@link FetchBlobResult} of the remote file * @throws IOException the exception while reading the data */ @ExperimentalApi - BlobDownloadResponse downloadBlobWithMetadata(Iterable path, String fileName) throws IOException; + FetchBlobResult downloadBlobWithMetadata(Iterable path, String fileName) throws IOException; void listAllInSortedOrder(Iterable path, String filenamePrefix, int limit, ActionListener> listener); From c168e1c9216ba914fcd5c310ffb070df8ef5e6b5 Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Fri, 12 Apr 2024 19:46:36 +0530 Subject: [PATCH 6/6] Update index settings during remote store migration (#12748) Signed-off-by: Lakshya Taragi --- .../RemoteStoreMigrationSettingsUpdateIT.java | 187 ++++++++++++++++++ ...eMigrationShardAllocationBaseTestCase.java | 101 ++++++++++ .../ResizeIndexMigrationTestCase.java | 91 +++++---- .../metadata/MetadataCreateIndexService.java | 94 ++++++--- .../remotestore/RemoteStoreNodeService.java | 15 ++ .../opensearch/snapshots/RestoreService.java | 17 +- .../MetadataCreateIndexServiceTests.java | 159 +++++++++++++-- ...eStoreMigrationAllocationDeciderTests.java | 2 +- 8 files changed, 569 insertions(+), 97 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java new file mode 100644 index 0000000000000..5ae2a976f4066 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java @@ -0,0 +1,187 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotState; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Optional; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteStoreMigrationSettingsUpdateIT extends RemoteStoreMigrationShardAllocationBaseTestCase { + + private Client client; + + // remote store backed index setting tests + + public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() { + logger.info("Initialize cluster: gives non remote cluster manager"); + initializeCluster(false); + + String indexName1 = "test_index_1"; + String indexName2 = "test_index_2"; + + logger.info("Add non-remote node"); + addRemote = false; + String nonRemoteNodeName = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + assertNodeInCluster(nonRemoteNodeName); + + logger.info("Create an index"); + prepareIndexWithoutReplica(Optional.of(indexName1)); + + logger.info("Verify that non remote-backed index is created"); + assertNonRemoteStoreBackedIndex(indexName1); + + logger.info("Set mixed cluster compatibility mode and remote_store direction"); + setClusterMode(MIXED.mode); + setDirection(REMOTE_STORE.direction); + + logger.info("Add remote node"); + addRemote = true; + String remoteNodeName = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + assertNodeInCluster(remoteNodeName); + + logger.info("Create another index"); + prepareIndexWithoutReplica(Optional.of(indexName2)); + + logger.info("Verify that remote backed index is created"); + assertRemoteStoreBackedIndex(indexName2); + } + + public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() throws Exception { + logger.info("Initialize cluster: gives non remote cluster manager"); + initializeCluster(false); + + logger.info("Add remote and non-remote nodes"); + setClusterMode(MIXED.mode); + addRemote = false; + String nonRemoteNodeName = internalCluster().startNode(); + addRemote = true; + String remoteNodeName = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + assertNodeInCluster(nonRemoteNodeName); + assertNodeInCluster(remoteNodeName); + + logger.info("Create a non remote-backed index"); + client.admin() + .indices() + .prepareCreate(TEST_INDEX) + .setSettings( + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ) + .get(); + + logger.info("Verify that non remote stored backed index is created"); + assertNonRemoteStoreBackedIndex(TEST_INDEX); + + logger.info("Create repository"); + String snapshotName = "test-snapshot"; + String snapshotRepoName = "test-restore-snapshot-repo"; + Path snapshotRepoNameAbsolutePath = randomRepoPath().toAbsolutePath(); + assertAcked( + clusterAdmin().preparePutRepository(snapshotRepoName) + .setType("fs") + .setSettings(Settings.builder().put("location", snapshotRepoNameAbsolutePath)) + ); + + logger.info("Create snapshot of non remote stored backed index"); + + SnapshotInfo snapshotInfo = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setIndices(TEST_INDEX) + .setWaitForCompletion(true) + .get() + .getSnapshotInfo(); + + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(0, snapshotInfo.failedShards()); + + logger.info("Restore index from snapshot under NONE direction"); + String restoredIndexName1 = TEST_INDEX + "-restored1"; + restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName1); + + logger.info("Verify that restored index is non remote-backed"); + assertNonRemoteStoreBackedIndex(restoredIndexName1); + + logger.info("Restore index from snapshot under REMOTE_STORE direction"); + setDirection(REMOTE_STORE.direction); + String restoredIndexName2 = TEST_INDEX + "-restored2"; + restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName2); + + logger.info("Verify that restored index is non remote-backed"); + assertRemoteStoreBackedIndex(restoredIndexName2); + } + + // restore indices from a snapshot + private void restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { + RestoreSnapshotResponse restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(false) + .setIndices(TEST_INDEX) + .setRenamePattern(TEST_INDEX) + .setRenameReplacement(restoredIndexName) + .get(); + + assertEquals(restoreSnapshotResponse.status(), RestStatus.ACCEPTED); + ensureGreen(restoredIndexName); + } + + // verify that the created index is not remote store backed + private void assertNonRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); + assertEquals(ReplicationType.DOCUMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertNull(indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertNull(indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertNull(indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + } + + // verify that the created index is remote store backed + private void assertRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); + assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertEquals("true", indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertEquals(REPOSITORY_NAME, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertEquals(REPOSITORY_2_NAME, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + assertEquals( + IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL, + INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) + ); + } + + // bootstrap a cluster + private void initializeCluster(boolean remoteClusterManager) { + addRemote = remoteClusterManager; + internalCluster().startClusterManagerOnlyNode(); + client = internalCluster().client(); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java new file mode 100644 index 0000000000000..ad2302d1ab2e1 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; + +import java.util.Map; +import java.util.Optional; + +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreMigrationShardAllocationBaseTestCase extends MigrationBaseTestCase { + protected static final String TEST_INDEX = "test_index"; + protected static final String NAME = "remote_store_migration"; + + protected final ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + + // set the compatibility mode of cluster [strict, mixed] + protected void setClusterMode(String mode) { + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), mode)); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + } + + // set the migration direction for cluster [remote_store, docrep, none] + public void setDirection(String direction) { + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), direction)); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + } + + // verify that the given nodeName exists in cluster + protected DiscoveryNode assertNodeInCluster(String nodeName) { + Map nodes = internalCluster().client().admin().cluster().prepareState().get().getState().nodes().getNodes(); + DiscoveryNode discoveryNode = null; + for (Map.Entry entry : nodes.entrySet()) { + DiscoveryNode node = entry.getValue(); + if (node.getName().equals(nodeName)) { + discoveryNode = node; + break; + } + } + assertNotNull(discoveryNode); + return discoveryNode; + } + + // returns a comma-separated list of node names excluding `except` + protected String allNodesExcept(String except) { + StringBuilder exclude = new StringBuilder(); + DiscoveryNodes allNodes = internalCluster().client().admin().cluster().prepareState().get().getState().nodes(); + for (DiscoveryNode node : allNodes) { + if (node.getName().equals(except) == false) { + exclude.append(node.getName()).append(","); + } + } + return exclude.toString(); + } + + // create a new test index + protected void prepareIndexWithoutReplica(Optional name) { + String indexName = name.orElse(TEST_INDEX); + internalCluster().client() + .admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + .put("index.routing.allocation.exclude._name", allNodesExcept(null)) + ) + .execute() + .actionGet(); + } + + protected ShardRouting getShardRouting(boolean isPrimary) { + IndexShardRoutingTable table = internalCluster().client() + .admin() + .cluster() + .prepareState() + .execute() + .actionGet() + .getState() + .getRoutingTable() + .index(TEST_INDEX) + .shard(0); + return (isPrimary ? table.primaryShard() : table.replicaShards().get(0)); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java index 0548ce4a7955f..b57bc60c50e8c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/ResizeIndexMigrationTestCase.java @@ -11,25 +11,20 @@ import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.indices.shrink.ResizeType; import org.opensearch.action.support.ActiveShardCount; -import org.opensearch.client.Client; import org.opensearch.common.settings.Settings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.OpenSearchIntegTestCase; -import java.util.List; - import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; -@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false) +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class ResizeIndexMigrationTestCase extends MigrationBaseTestCase { private static final String TEST_INDEX = "test_index"; private final static String REMOTE_STORE_DIRECTION = "remote_store"; private final static String DOC_REP_DIRECTION = "docrep"; - private final static String NONE_DIRECTION = "none"; - private final static String STRICT_MODE = "strict"; private final static String MIXED_MODE = "mixed"; /* @@ -37,38 +32,43 @@ public class ResizeIndexMigrationTestCase extends MigrationBaseTestCase { * and index is on DocRep node, and migration to remote store is in progress. * */ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Exception { - - internalCluster().setBootstrapClusterManagerNodeIndex(0); - List cmNodes = internalCluster().startNodes(1); - Client client = internalCluster().client(cmNodes.get(0)); - ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); - updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), MIXED_MODE)); - assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); - - // Adding a non remote and a remote node addRemote = false; - String nonRemoteNodeName = internalCluster().startNode(); + // create a docrep cluster + internalCluster().startClusterManagerOnlyNode(); + internalCluster().validateClusterFormed(); - addRemote = true; - String remoteNodeName = internalCluster().startNode(); + // add a non-remote node + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); logger.info("-->Create index on non-remote node and SETTING_REMOTE_STORE_ENABLED is false. Resize should not happen"); Settings.Builder builder = Settings.builder().put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); - client.admin() + internalCluster().client() + .admin() .indices() .prepareCreate(TEST_INDEX) .setSettings( builder.put("index.number_of_shards", 10) .put("index.number_of_replicas", 0) .put("index.routing.allocation.include._name", nonRemoteNodeName) - .put("index.routing.allocation.exclude._name", remoteNodeName) ) .setWaitForActiveShards(ActiveShardCount.ALL) .execute() .actionGet(); + // set mixed mode + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), MIXED_MODE)); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + // add a remote node + addRemote = true; + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + + // set remote store migration direction updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), REMOTE_STORE_DIRECTION)); - assertAcked(client.admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); ResizeType resizeType; int resizeShardsNum; @@ -90,7 +90,8 @@ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Except cause = "clone_index"; } - client.admin() + internalCluster().client() + .admin() .indices() .prepareUpdateSettings(TEST_INDEX) .setSettings(Settings.builder().put("index.blocks.write", true)) @@ -106,7 +107,8 @@ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Except IllegalStateException ex = expectThrows( IllegalStateException.class, - () -> client().admin() + () -> internalCluster().client() + .admin() .indices() .prepareResizeIndex(TEST_INDEX, "first_split") .setResizeType(resizeType) @@ -124,38 +126,43 @@ public void testFailResizeIndexWhileDocRepToRemoteStoreMigration() throws Except * and index is on Remote Store node, and migration to DocRep node is in progress. * */ public void testFailResizeIndexWhileRemoteStoreToDocRepMigration() throws Exception { - + // creates a remote cluster addRemote = true; - internalCluster().setBootstrapClusterManagerNodeIndex(0); - List cmNodes = internalCluster().startNodes(1); - Client client = internalCluster().client(cmNodes.get(0)); - ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); - updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), MIXED_MODE)); - assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().validateClusterFormed(); - // Adding a non remote and a remote node - String remoteNodeName = internalCluster().startNode(); - - addRemote = false; - String nonRemoteNodeName = internalCluster().startNode(); + // add a remote node + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); - logger.info("-->Create index on remote node and SETTING_REMOTE_STORE_ENABLED is true. Resize should not happen"); + logger.info("--> Create index on remote node and SETTING_REMOTE_STORE_ENABLED is true. Resize should not happen"); Settings.Builder builder = Settings.builder().put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); - client.admin() + internalCluster().client() + .admin() .indices() .prepareCreate(TEST_INDEX) .setSettings( builder.put("index.number_of_shards", 10) .put("index.number_of_replicas", 0) .put("index.routing.allocation.include._name", remoteNodeName) - .put("index.routing.allocation.exclude._name", nonRemoteNodeName) ) .setWaitForActiveShards(ActiveShardCount.ALL) .execute() .actionGet(); + // set mixed mode + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), MIXED_MODE)); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + // add a non-remote node + addRemote = false; + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + + // set docrep migration direction updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), DOC_REP_DIRECTION)); - assertAcked(client.admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); ResizeType resizeType; int resizeShardsNum; @@ -177,7 +184,8 @@ public void testFailResizeIndexWhileRemoteStoreToDocRepMigration() throws Except cause = "clone_index"; } - client.admin() + internalCluster().client() + .admin() .indices() .prepareUpdateSettings(TEST_INDEX) .setSettings(Settings.builder().put("index.blocks.write", true)) @@ -193,7 +201,8 @@ public void testFailResizeIndexWhileRemoteStoreToDocRepMigration() throws Except IllegalStateException ex = expectThrows( IllegalStateException.class, - () -> client().admin() + () -> internalCluster().client() + .admin() .indices() .prepareResizeIndex(TEST_INDEX, "first_split") .setResizeType(resizeType) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index a180876685f9c..d55ec3362b01f 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -52,6 +52,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; @@ -100,8 +101,8 @@ import org.opensearch.indices.ShardLimitValidator; import org.opensearch.indices.SystemIndices; import org.opensearch.indices.replication.common.ReplicationType; -import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -144,6 +145,8 @@ import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteDataAttributePresent; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.isMigratingToRemoteStore; /** * Service responsible for submitting create index requests @@ -944,8 +947,8 @@ static Settings aggregateIndexSettings( indexSettingsBuilder.put(IndexMetadata.SETTING_INDEX_PROVIDED_NAME, request.getProvidedName()); indexSettingsBuilder.put(SETTING_INDEX_UUID, UUIDs.randomBase64UUID()); - updateReplicationStrategy(indexSettingsBuilder, request.settings(), settings, combinedTemplateSettings); - updateRemoteStoreSettings(indexSettingsBuilder, settings); + updateReplicationStrategy(indexSettingsBuilder, request.settings(), settings, combinedTemplateSettings, clusterSettings); + updateRemoteStoreSettings(indexSettingsBuilder, currentState, clusterSettings, settings, request.index()); if (sourceMetadata != null) { assert request.resizeType() != null; @@ -996,26 +999,31 @@ static Settings aggregateIndexSettings( public static void updateReplicationStrategy( Settings.Builder settingsBuilder, Settings requestSettings, - Settings clusterSettings, - Settings combinedTemplateSettings + Settings nodeSettings, + Settings combinedTemplateSettings, + ClusterSettings clusterSettings ) { // The replication setting is applied in the following order: - // 1. Explicit index creation request parameter - // 2. Template property for replication type - // 3. Defaults to segment if remote store attributes on the cluster - // 4. Default cluster level setting + // 1. Strictly SEGMENT if cluster is undergoing remote store migration + // 2. Explicit index creation request parameter + // 3. Template property for replication type + // 4. Replication type according to cluster level settings + // 5. Defaults to segment if remote store attributes on the cluster + // 6. Default cluster level setting final ReplicationType indexReplicationType; - if (INDEX_REPLICATION_TYPE_SETTING.exists(requestSettings)) { + if (isMigratingToRemoteStore(clusterSettings)) { + indexReplicationType = ReplicationType.SEGMENT; + } else if (INDEX_REPLICATION_TYPE_SETTING.exists(requestSettings)) { indexReplicationType = INDEX_REPLICATION_TYPE_SETTING.get(requestSettings); } else if (combinedTemplateSettings != null && INDEX_REPLICATION_TYPE_SETTING.exists(combinedTemplateSettings)) { indexReplicationType = INDEX_REPLICATION_TYPE_SETTING.get(combinedTemplateSettings); - } else if (CLUSTER_REPLICATION_TYPE_SETTING.exists(clusterSettings)) { - indexReplicationType = CLUSTER_REPLICATION_TYPE_SETTING.get(clusterSettings); - } else if (isRemoteDataAttributePresent(clusterSettings)) { + } else if (CLUSTER_REPLICATION_TYPE_SETTING.exists(nodeSettings)) { + indexReplicationType = CLUSTER_REPLICATION_TYPE_SETTING.get(nodeSettings); + } else if (isRemoteDataAttributePresent(nodeSettings)) { indexReplicationType = ReplicationType.SEGMENT; } else { - indexReplicationType = CLUSTER_REPLICATION_TYPE_SETTING.getDefault(clusterSettings); + indexReplicationType = CLUSTER_REPLICATION_TYPE_SETTING.getDefault(nodeSettings); } settingsBuilder.put(SETTING_REPLICATION_TYPE, indexReplicationType); } @@ -1023,23 +1031,49 @@ public static void updateReplicationStrategy( /** * Updates index settings to enable remote store by default based on node attributes * @param settingsBuilder index settings builder to be updated with relevant settings - * @param nodeSettings node settings + * @param clusterState state of cluster + * @param clusterSettings cluster level settings + * @param nodeSettings node level settings + * @param indexName name of index */ - public static void updateRemoteStoreSettings(Settings.Builder settingsBuilder, Settings nodeSettings) { - if (RemoteStoreNodeAttribute.isRemoteStoreAttributePresent(nodeSettings)) { - settingsBuilder.put(SETTING_REMOTE_STORE_ENABLED, true) - .put( - SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, - nodeSettings.get( - Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY - ) - ) - .put( - SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, - nodeSettings.get( - Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY - ) - ); + public static void updateRemoteStoreSettings( + Settings.Builder settingsBuilder, + ClusterState clusterState, + ClusterSettings clusterSettings, + Settings nodeSettings, + String indexName + ) { + if ((isRemoteDataAttributePresent(nodeSettings) + && clusterSettings.get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(RemoteStoreNodeService.CompatibilityMode.STRICT)) + || isMigratingToRemoteStore(clusterSettings)) { + String segmentRepo, translogRepo; + + Optional remoteNode = clusterState.nodes() + .getNodes() + .values() + .stream() + .filter(DiscoveryNode::isRemoteStoreNode) + .findFirst(); + + if (remoteNode.isPresent()) { + translogRepo = remoteNode.get() + .getAttributes() + .get(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY); + segmentRepo = remoteNode.get() + .getAttributes() + .get(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY); + if (segmentRepo != null && translogRepo != null) { + settingsBuilder.put(SETTING_REMOTE_STORE_ENABLED, true) + .put(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, segmentRepo) + .put(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, translogRepo); + } else { + ValidationException validationException = new ValidationException(); + validationException.addValidationErrors( + Collections.singletonList("Cluster is migrating to remote store but no remote node found, failing index creation") + ); + throw new IndexCreationException(indexName, validationException); + } + } } } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index 33b182dd3cc97..94b11086ba865 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -14,6 +14,7 @@ import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.util.FeatureFlags; import org.opensearch.repositories.RepositoriesService; @@ -223,4 +224,18 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode return existingRepositories; } } + + /** + * To check if the cluster is undergoing remote store migration + * @param clusterSettings cluster level settings + * @return + * true For REMOTE_STORE migration direction and MIXED compatibility mode, + * false otherwise + */ + public static boolean isMigratingToRemoteStore(ClusterSettings clusterSettings) { + boolean isMixedMode = clusterSettings.get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(CompatibilityMode.MIXED); + boolean isRemoteStoreMigrationDirection = clusterSettings.get(MIGRATION_DIRECTION_SETTING).equals(Direction.REMOTE_STORE); + + return (isMixedMode && isRemoteStoreMigrationDirection); + } } diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index 980610d337a63..b79a6a88250f8 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -92,6 +92,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.ShardLimitValidator; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; @@ -678,19 +679,27 @@ private Settings getOverrideSettingsInternal() { // We will use whatever replication strategy provided by user or from snapshot metadata unless // cluster is remote store enabled or user have restricted a specific replication type in the - // cluster. + // cluster. If cluster is undergoing remote store migration, replication strategy is strictly SEGMENT type if (RemoteStoreNodeAttribute.isRemoteStoreAttributePresent(clusterService.getSettings()) == true - || clusterSettings.get(IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING) == true) { + || clusterSettings.get(IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING) == true + || RemoteStoreNodeService.isMigratingToRemoteStore(clusterSettings) == true) { MetadataCreateIndexService.updateReplicationStrategy( settingsBuilder, request.indexSettings(), clusterService.getSettings(), - null + null, + clusterSettings ); } // remote store settings needs to be overridden if the remote store feature is enabled in the // cluster where snapshot is being restored. - MetadataCreateIndexService.updateRemoteStoreSettings(settingsBuilder, clusterService.getSettings()); + MetadataCreateIndexService.updateRemoteStoreSettings( + settingsBuilder, + clusterService.state(), + clusterSettings, + clusterService.getSettings(), + request.getDescription() + ); return settingsBuilder.build(); } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index fa71b77648d35..d3086de6ec89e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -55,6 +55,7 @@ import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.UUIDs; import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.IndexScopedSettings; @@ -62,6 +63,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -82,6 +84,7 @@ import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.indices.SystemIndices; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.snapshots.EmptySnapshotsInfoService; import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; @@ -136,6 +139,7 @@ import static org.opensearch.cluster.metadata.MetadataCreateIndexService.getIndexNumberOfRoutingShards; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.parseV1Mappings; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.resolveAndValidateAliases; +import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; import static org.opensearch.index.IndexSettings.INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; @@ -150,6 +154,8 @@ import static org.opensearch.node.Node.NODE_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; @@ -1349,18 +1355,16 @@ public void testClusterForceReplicationTypeInValidateIndexSettings() { } public void testRemoteStoreNoUserOverrideExceptReplicationTypeSegmentIndexSettings() { - Settings settings = Settings.builder() - .put(CLUSTER_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) - .put(segmentRepositoryNameAttributeKey, "my-segment-repo-1") - .put(translogRepositoryNameAttributeKey, "my-translog-repo-1") + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(getRemoteNode()).build()) .build(); - + Settings settings = Settings.builder().put(translogRepositoryNameAttributeKey, "my-translog-repo-1").build(); request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); final Settings.Builder requestSettings = Settings.builder(); requestSettings.put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); request.settings(requestSettings.build()); Settings indexSettings = aggregateIndexSettings( - ClusterState.EMPTY_STATE, + clusterState, request, Settings.EMPTY, null, @@ -1376,21 +1380,20 @@ public void testRemoteStoreNoUserOverrideExceptReplicationTypeSegmentIndexSettin "my-segment-repo-1", "my-translog-repo-1", ReplicationType.SEGMENT.toString(), - IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL + null ); } public void testRemoteStoreImplicitOverrideReplicationTypeToSegmentForRemoteStore() { - Settings settings = Settings.builder() - .put(segmentRepositoryNameAttributeKey, "my-segment-repo-1") - .put(translogRepositoryNameAttributeKey, "my-translog-repo-1") + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(getRemoteNode()).build()) .build(); - + Settings settings = Settings.builder().put(translogRepositoryNameAttributeKey, "my-translog-repo-1").build(); request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); final Settings.Builder requestSettings = Settings.builder(); request.settings(requestSettings.build()); Settings indexSettings = aggregateIndexSettings( - ClusterState.EMPTY_STATE, + clusterState, request, Settings.EMPTY, null, @@ -1406,20 +1409,18 @@ public void testRemoteStoreImplicitOverrideReplicationTypeToSegmentForRemoteStor "my-segment-repo-1", "my-translog-repo-1", ReplicationType.SEGMENT.toString(), - IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL + null ); } public void testRemoteStoreNoUserOverrideIndexSettings() { - Settings settings = Settings.builder() - .put(CLUSTER_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) - .put(segmentRepositoryNameAttributeKey, "my-segment-repo-1") - .put(translogRepositoryNameAttributeKey, "my-translog-repo-1") + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(getRemoteNode()).build()) .build(); - + Settings settings = Settings.builder().put(translogRepositoryNameAttributeKey, "my-translog-repo-1").build(); request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); Settings indexSettings = aggregateIndexSettings( - ClusterState.EMPTY_STATE, + clusterState, request, Settings.EMPTY, null, @@ -1435,7 +1436,7 @@ public void testRemoteStoreNoUserOverrideIndexSettings() { "my-segment-repo-1", "my-translog-repo-1", ReplicationType.SEGMENT.toString(), - IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL + null ); } @@ -1551,6 +1552,109 @@ public void testRemoteStoreOverrideTranslogRepoIndexSettings() { })); } + public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build()); + + // non-remote cluster manager node + DiscoveryNode nonRemoteClusterManagerNode = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(nonRemoteClusterManagerNode) + .localNodeId(nonRemoteClusterManagerNode.getId()) + .build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(discoveryNodes).build(); + + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test"); + + Settings indexSettings = aggregateIndexSettings( + clusterState, + request, + Settings.EMPTY, + null, + Settings.EMPTY, + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + randomShardLimitService(), + Collections.emptySet(), + clusterSettings + ); + verifyRemoteStoreIndexSettings(indexSettings, null, null, null, ReplicationType.DOCUMENT.toString(), null); + + // remote data node + DiscoveryNode remoteDataNode = getRemoteNode(); + + discoveryNodes = DiscoveryNodes.builder(discoveryNodes).add(remoteDataNode).localNodeId(remoteDataNode.getId()).build(); + + clusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(discoveryNodes).build(); + + Settings remoteStoreMigrationSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .build(); + + clusterSettings = new ClusterSettings(remoteStoreMigrationSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + indexSettings = aggregateIndexSettings( + clusterState, + request, + Settings.EMPTY, + null, + Settings.EMPTY, + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + randomShardLimitService(), + Collections.emptySet(), + clusterSettings + ); + + verifyRemoteStoreIndexSettings( + indexSettings, + "true", + "my-segment-repo-1", + "my-translog-repo-1", + ReplicationType.SEGMENT.toString(), + null + ); + + Map missingTranslogAttribute = Map.of(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + + DiscoveryNodes finalDiscoveryNodes = DiscoveryNodes.builder() + .add(nonRemoteClusterManagerNode) + .add( + new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + missingTranslogAttribute, + Set.of(DiscoveryNodeRole.INGEST_ROLE, DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, DiscoveryNodeRole.DATA_ROLE), + Version.CURRENT + ) + ) + .build(); + + ClusterState finalClusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(finalDiscoveryNodes).build(); + ClusterSettings finalClusterSettings = clusterSettings; + + final IndexCreationException error = expectThrows(IndexCreationException.class, () -> { + aggregateIndexSettings( + finalClusterState, + request, + Settings.EMPTY, + null, + Settings.EMPTY, + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + randomShardLimitService(), + Collections.emptySet(), + finalClusterSettings + ); + }); + + assertThat( + error.getCause().getMessage(), + containsString("Cluster is migrating to remote store but no remote node found, failing index creation") + ); + } + public void testBuildIndexMetadata() { IndexMetadata sourceIndexMetadata = IndexMetadata.builder("parent") .settings(Settings.builder().put("index.version.created", Version.CURRENT).build()) @@ -2118,7 +2222,20 @@ private void verifyRemoteStoreIndexSettings( assertEquals(isRemoteSegmentEnabled, indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); assertEquals(remoteSegmentRepo, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); assertEquals(remoteTranslogRepo, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); - assertEquals(translogBufferInterval, INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings)); + assertEquals(translogBufferInterval, indexSettings.get(INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.getKey())); + } + + private DiscoveryNode getRemoteNode() { + Map attributes = new HashMap<>(); + attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + attributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); + return new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + attributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); } @After diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 43363407d9249..3e130a42952e4 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -664,7 +664,7 @@ private DiscoveryNode getNonRemoteNode() { } // get a dummy remote node - public DiscoveryNode getRemoteNode() { + private DiscoveryNode getRemoteNode() { Map attributes = new HashMap<>(); attributes.put( REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY,