From 7197a29fecf5fd8fa1104dcbd0acc568849a5b24 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Fri, 26 Apr 2024 22:47:22 -0700 Subject: [PATCH 01/16] change cancellation logic to fix disparity bw trackers and resource duress Signed-off-by: Kaushal Kumar --- .../SearchBackpressureService.java | 219 +++++++++++------- .../stats/SearchShardTaskStats.java | 10 +- .../backpressure/stats/SearchTaskStats.java | 10 +- .../trackers/CpuUsageTracker.java | 6 +- .../trackers/ElapsedTimeTracker.java | 6 +- .../trackers/HeapUsageTracker.java | 6 +- .../trackers/NodeDuressTracker.java | 41 ---- .../trackers/NodeDuressTrackers.java | 91 ++++++++ .../trackers/TaskResourceUsageTracker.java | 63 ----- .../trackers/TaskResourceUsageTrackers.java | 162 +++++++++++++ .../opensearch/tasks/TaskCancellation.java | 8 + .../SearchBackpressureServiceTests.java | 54 +++-- .../stats/SearchShardTaskStatsTests.java | 4 +- .../stats/SearchTaskStatsTests.java | 4 +- .../trackers/NodeDuressTrackerTests.java | 15 +- .../tasks/TaskCancellationTests.java | 12 +- 16 files changed, 460 insertions(+), 251 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTracker.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java delete mode 100644 server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java create mode 100644 server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index ebf9623eb367a..6febd696b776a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -28,9 +28,9 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.NodeDuressTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.Task; @@ -42,12 +42,7 @@ import org.opensearch.threadpool.ThreadPool; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import java.util.function.DoubleSupplier; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -70,8 +65,8 @@ public class SearchBackpressureService extends AbstractLifecycleComponent implem private final ThreadPool threadPool; private final LongSupplier timeNanosSupplier; - private final List nodeDuressTrackers; - private final Map, List> taskTrackers; + private final NodeDuressTrackers nodeDuressTrackers; + private final Map, TaskResourceUsageTrackers> taskTrackers; private final Map, SearchBackpressureState> searchBackpressureStates; private final TaskManager taskManager; @@ -87,12 +82,14 @@ public SearchBackpressureService( taskResourceTrackingService, threadPool, System::nanoTime, - List.of( - new NodeDuressTracker( - () -> ProcessProbe.getInstance().getProcessCpuPercent() / 100.0 >= settings.getNodeDuressSettings().getCpuThreshold() + new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker( + () -> ProcessProbe.getInstance().getProcessCpuPercent() / 100.0 >= settings.getNodeDuressSettings().getCpuThreshold(), + () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() ), - new NodeDuressTracker( - () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold() + new NodeDuressTrackers.NodeDuressTracker( + () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold(), + () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() ) ), getTrackers( @@ -122,9 +119,9 @@ public SearchBackpressureService( TaskResourceTrackingService taskResourceTrackingService, ThreadPool threadPool, LongSupplier timeNanosSupplier, - List nodeDuressTrackers, - List searchTaskTrackers, - List searchShardTaskTrackers, + NodeDuressTrackers nodeDuressTrackers, + TaskResourceUsageTrackers searchTaskTrackers, + TaskResourceUsageTrackers searchShardTaskTrackers, TaskManager taskManager ) { this.settings = settings; @@ -163,7 +160,7 @@ void doRun() { return; } - if (isNodeInDuress() == false) { + if (nodeDuressTrackers.isNodeInDuress() == false) { return; } @@ -175,28 +172,40 @@ void doRun() { taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); taskResourceTrackingService.refreshResourceStats(searchShardTasks.toArray(new Task[0])); - // Check if increase in heap usage is due to SearchTasks - if (HeapUsageTracker.isHeapUsageDominatedBySearch( - searchTasks, - getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() - )) { - cancellableTasks.addAll(searchTasks); - } + List taskCancellations = new ArrayList<>(); - // Check if increase in heap usage is due to SearchShardTasks - if (HeapUsageTracker.isHeapUsageDominatedBySearch( - searchShardTasks, - getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() - )) { - cancellableTasks.addAll(searchShardTasks); - } + addHeapBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + + addCPUBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + + addElapsedTimeBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + + // Since these cancellations might be duplicate due to multiple trackers causing cancellation for same task + // We need to merge them + taskCancellations = mergeTaskCancellations(taskCancellations); + +// // Check if increase in heap usage is due to SearchTasks +// if (HeapUsageTracker.isHeapUsageDominatedBySearch( +// searchTasks, +// getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() +// )) { +// cancellableTasks.addAll(searchTasks); +// } +// +// // Check if increase in heap usage is due to SearchShardTasks +// if (HeapUsageTracker.isHeapUsageDominatedBySearch( +// searchShardTasks, +// getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() +// )) { +// cancellableTasks.addAll(searchShardTasks); +// } // none of the task type is breaching the heap usage thresholds and hence we do not cancel any tasks - if (cancellableTasks.isEmpty()) { - return; - } +// if (taskCancellations.isEmpty()) { +// return; +// } - for (TaskCancellation taskCancellation : getTaskCancellations(cancellableTasks)) { + for (TaskCancellation taskCancellation : taskCancellations) { logger.warn( "[{} mode] cancelling task [{}] due to high resource consumption [{}]", mode.getName(), @@ -226,6 +235,78 @@ void doRun() { } } + private void addElapsedTimeBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskElapsedTimeTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getElapsedTimeTracker(); + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskElapsedTimeTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getElapsedTimeTracker(); + + taskCancellations.addAll( + searchTaskElapsedTimeTracker.getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) + ); + + taskCancellations.addAll( + searchShardTaskElapsedTimeTracker.getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount) + ); + } + + private void addCPUBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { + if (nodeDuressTrackers.isCPUInDuress()) { + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskCPUUsageTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getCpuUsageTracker(); + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskCPUUsageTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getCpuUsageTracker(); + + taskCancellations.addAll( + searchTaskCPUUsageTracker + .getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) + ); + + taskCancellations.addAll( + searchShardTaskCPUUsageTracker + .getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) + ); + } + } + + private void addHeapBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { + if (isHeapTrackingSupported() && nodeDuressTrackers.isHeapInDuress()) { + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskHeapUsageTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getHeapUsageTracker(); + final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskHeapUsageTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getHeapUsageTracker(); + + taskCancellations = searchTaskHeapUsageTracker + .getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); + + taskCancellations.addAll( + searchShardTaskHeapUsageTracker + .getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchShardTask.class)::incrementCompletionCount) + ); + } + } + + + /** + * returns the taskTrackers for given type + * @param type + * @return + */ + private TaskResourceUsageTrackers getTaskResourceUsageTrackersByType(Class type) { + return taskTrackers.get(type); + } + + + /** + * Method to reduce the taskCancellations into unique bunch + * @param taskCancellations + * @return + */ + private List mergeTaskCancellations(final List taskCancellations) { + final Map uniqueTaskCancellations = new HashMap<>(); + + for (TaskCancellation taskCancellation: taskCancellations) { + final long taskId = taskCancellation.getTask().getId(); + uniqueTaskCancellations.put(taskId, + uniqueTaskCancellations.getOrDefault(taskId, taskCancellation).merge(taskCancellation)); + } + + return new ArrayList<>(uniqueTaskCancellations.values()); + } /** * Given a task, returns the type of the task */ @@ -243,16 +324,7 @@ Class getTaskType(Task task) { * Returns true if the node is in duress consecutively for the past 'n' observations. */ boolean isNodeInDuress() { - boolean isNodeInDuress = false; - int numSuccessiveBreaches = getSettings().getNodeDuressSettings().getNumSuccessiveBreaches(); - - for (NodeDuressTracker tracker : nodeDuressTrackers) { - if (tracker.check() >= numSuccessiveBreaches) { - isNodeInDuress = true; // not breaking the loop so that each tracker's streak gets updated. - } - } - - return isNodeInDuress; + return nodeDuressTrackers.isNodeInDuress(); } /* @@ -271,39 +343,6 @@ List getTa .collect(Collectors.toUnmodifiableList()); } - /** - * Returns a TaskCancellation wrapper containing the list of reasons (possibly zero), along with an overall - * cancellation score for the given task. Cancelling a task with a higher score has better chance of recovering the - * node from duress. - */ - TaskCancellation getTaskCancellation(CancellableTask task) { - List reasons = new ArrayList<>(); - List callbacks = new ArrayList<>(); - Class taskType = getTaskType(task); - List trackers = taskTrackers.get(taskType); - for (TaskResourceUsageTracker tracker : trackers) { - Optional reason = tracker.checkAndMaybeGetCancellationReason(task); - if (reason.isPresent()) { - callbacks.add(tracker::incrementCancellations); - reasons.add(reason.get()); - } - } - callbacks.add(searchBackpressureStates.get(taskType)::incrementCancellationCount); - - return new TaskCancellation(task, reasons, callbacks); - } - - /** - * Returns a list of TaskCancellations sorted by descending order of their cancellation scores. - */ - List getTaskCancellations(List tasks) { - return tasks.stream() - .map(this::getTaskCancellation) - .filter(TaskCancellation::isEligibleForCancellation) - .sorted(Comparator.reverseOrder()) - .collect(Collectors.toUnmodifiableList()); - } - SearchBackpressureSettings getSettings() { return settings; } @@ -315,7 +354,7 @@ SearchBackpressureState getSearchBackpressureState(Class getTrackers( + public static TaskResourceUsageTrackers getTrackers( LongSupplier cpuThresholdSupplier, DoubleSupplier heapVarianceSupplier, DoubleSupplier heapPercentThresholdSupplier, @@ -324,10 +363,10 @@ public static List getTrackers( ClusterSettings clusterSettings, Setting windowSizeSetting ) { - List trackers = new ArrayList<>(); - trackers.add(new CpuUsageTracker(cpuThresholdSupplier)); + TaskResourceUsageTrackers trackers = new TaskResourceUsageTrackers(); + trackers.addCpuUsageTracker(new CpuUsageTracker(cpuThresholdSupplier)); if (isHeapTrackingSupported()) { - trackers.add( + trackers.addHeapUsageTracker( new HeapUsageTracker( heapVarianceSupplier, heapPercentThresholdSupplier, @@ -339,8 +378,8 @@ public static List getTrackers( } else { logger.warn("heap size couldn't be determined"); } - trackers.add(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime)); - return Collections.unmodifiableList(trackers); + trackers.addElapsedTimeTracker(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime)); + return trackers; } @Override @@ -360,8 +399,8 @@ public void onTaskCompleted(Task task) { } List exceptions = new ArrayList<>(); - List trackers = taskTrackers.get(taskType); - for (TaskResourceUsageTracker tracker : trackers) { + TaskResourceUsageTrackers trackers = getTaskResourceUsageTrackersByType(taskType); + for (TaskResourceUsageTrackers.TaskResourceUsageTracker tracker : trackers.all()) { try { tracker.update(task); } catch (Exception e) { @@ -400,7 +439,7 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), searchBackpressureStates.get(SearchTask.class).getCompletionCount(), - taskTrackers.get(SearchTask.class) + getTaskResourceUsageTrackersByType(SearchTask.class).all() .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); @@ -409,7 +448,7 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), searchBackpressureStates.get(SearchShardTask.class).getCompletionCount(), - taskTrackers.get(SearchShardTask.class) + getTaskResourceUsageTrackersByType(SearchShardTask.class).all() .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java index ffe97d125b27a..5bd8fc2f18025 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -18,7 +18,7 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import java.io.IOException; @@ -32,13 +32,13 @@ public class SearchShardTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final long completionCount; - private final Map resourceUsageTrackerStats; + private final Map resourceUsageTrackerStats; public SearchShardTaskStats( long cancellationCount, long limitReachedCount, long completionCount, - Map resourceUsageTrackerStats + Map resourceUsageTrackerStats ) { this.cancellationCount = cancellationCount; this.limitReachedCount = limitReachedCount; @@ -55,7 +55,7 @@ public SearchShardTaskStats(StreamInput in) throws IOException { completionCount = -1; } - MapBuilder builder = new MapBuilder<>(); + MapBuilder builder = new MapBuilder<>(); builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); @@ -67,7 +67,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index a7f9b4e3d004f..37d62da635677 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -18,8 +18,8 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import java.io.IOException; import java.util.Map; @@ -33,13 +33,13 @@ public class SearchTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final long completionCount; - private final Map resourceUsageTrackerStats; + private final Map resourceUsageTrackerStats; public SearchTaskStats( long cancellationCount, long limitReachedCount, long completionCount, - Map resourceUsageTrackerStats + Map resourceUsageTrackerStats ) { this.cancellationCount = cancellationCount; this.limitReachedCount = limitReachedCount; @@ -56,7 +56,7 @@ public SearchTaskStats(StreamInput in) throws IOException { this.completionCount = -1; } - MapBuilder builder = new MapBuilder<>(); + MapBuilder builder = new MapBuilder<>(); builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); @@ -68,7 +68,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 507953cb4a20e..e26e3065aa393 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -29,7 +29,7 @@ * * @opensearch.internal */ -public class CpuUsageTracker extends TaskResourceUsageTracker { +public class CpuUsageTracker extends TaskResourceUsageTrackers.TaskResourceUsageTracker { private final LongSupplier thresholdSupplier; @@ -64,7 +64,7 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); @@ -73,7 +73,7 @@ public TaskResourceUsageTracker.Stats stats(List activeTasks) { /** * Stats related to CpuUsageTracker. */ - public static class Stats implements TaskResourceUsageTracker.Stats { + public static class Stats implements TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats { private final long cancellationCount; private final long currentMax; private final long currentAvg; diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index f1e8abe7e3230..a9e7925f3a81e 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -29,7 +29,7 @@ * * @opensearch.internal */ -public class ElapsedTimeTracker extends TaskResourceUsageTracker { +public class ElapsedTimeTracker extends TaskResourceUsageTrackers.TaskResourceUsageTracker { private final LongSupplier thresholdSupplier; private final LongSupplier timeNanosSupplier; @@ -65,7 +65,7 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); @@ -75,7 +75,7 @@ public TaskResourceUsageTracker.Stats stats(List activeTasks) { /** * Stats related to ElapsedTimeTracker. */ - public static class Stats implements TaskResourceUsageTracker.Stats { + public static class Stats implements TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats { private final long cancellationCount; private final long currentMax; private final long currentAvg; diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index 56b9f947f6e37..d966f40b9c78a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -37,7 +37,7 @@ * * @opensearch.internal */ -public class HeapUsageTracker extends TaskResourceUsageTracker { +public class HeapUsageTracker extends TaskResourceUsageTrackers.TaskResourceUsageTracker { private static final Logger logger = LogManager.getLogger(HeapUsageTracker.class); private static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); private final DoubleSupplier heapVarianceSupplier; @@ -117,7 +117,7 @@ public static boolean isHeapUsageDominatedBySearch(List cancell } @Override - public TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); @@ -126,7 +126,7 @@ public TaskResourceUsageTracker.Stats stats(List activeTasks) { /** * Stats related to HeapUsageTracker. */ - public static class Stats implements TaskResourceUsageTracker.Stats { + public static class Stats implements TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats { private final long cancellationCount; private final long currentMax; private final long currentAvg; diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTracker.java deleted file mode 100644 index 8e35c724a8fef..0000000000000 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTracker.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.trackers; - -import org.opensearch.common.util.Streak; - -import java.util.function.BooleanSupplier; - -/** - * NodeDuressTracker is used to check if the node is in duress. - * - * @opensearch.internal - */ -public class NodeDuressTracker { - /** - * Tracks the number of consecutive breaches. - */ - private final Streak breaches = new Streak(); - - /** - * Predicate that returns true when the node is in duress. - */ - private final BooleanSupplier isNodeInDuress; - - public NodeDuressTracker(BooleanSupplier isNodeInDuress) { - this.isNodeInDuress = isNodeInDuress; - } - - /** - * Evaluates the predicate and returns the number of consecutive breaches. - */ - public int check() { - return breaches.record(isNodeInDuress.getAsBoolean()); - } -} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java new file mode 100644 index 0000000000000..ef22d8e5b7249 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java @@ -0,0 +1,91 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.trackers; + +import org.opensearch.common.util.Streak; + +import java.util.function.BooleanSupplier; +import java.util.function.IntSupplier; + +/** + * NodeDuressTrackers is used to check if the node is in duress based on various resources. + * + * @opensearch.internal + */ +public class NodeDuressTrackers { + + private final NodeDuressTracker heapDuressTracker; + private final NodeDuressTracker cpuDuressTracker; + + public NodeDuressTrackers(final NodeDuressTracker heapDuressTracker, final NodeDuressTracker cpuDuressTracker) { + this.heapDuressTracker = heapDuressTracker; + this.cpuDuressTracker = cpuDuressTracker; + } + + + /** + * Method to check the heap duress + * @return true if heap is in duress + */ + public boolean isHeapInDuress() { + return heapDuressTracker.test(); + } + + + /** + * Method to check the CPU duress + * @return true if cpu is in duress + */ + public boolean isCPUInDuress() { + return cpuDuressTracker.test(); + } + + /** + * Method to evaluate whether the node is in duress or not + * @return true if node is in duress because of either system resource + */ + public boolean isNodeInDuress() { + return isCPUInDuress() || isHeapInDuress(); + } + + + /** + * NodeDuressTracker is used to check if the node is in duress + * @opensearch.internal + */ + public static class NodeDuressTracker { + /** + * Tracks the number of consecutive breaches. + */ + private final Streak breaches = new Streak(); + + /** + * Predicate that returns true when the node is in duress. + */ + private final BooleanSupplier isNodeInDuress; + + /** + * Predicate that returns the max number of breaches allowed for this resource before we mark it as in duress + */ + private final IntSupplier maxBreachAllowedSupplier; + + public NodeDuressTracker(BooleanSupplier isNodeInDuress, + IntSupplier maxBreachAllowedSupplier) { + this.isNodeInDuress = isNodeInDuress; + this.maxBreachAllowedSupplier = maxBreachAllowedSupplier; + } + + /** + * Returns true if the node is in duress consecutively for the past 'n' observations. + */ + public boolean test() { + return breaches.record(isNodeInDuress.getAsBoolean()) >= maxBreachAllowedSupplier.getAsInt(); + } + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java deleted file mode 100644 index ce15e9e9b6622..0000000000000 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTracker.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.backpressure.trackers; - -import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.xcontent.ToXContentObject; -import org.opensearch.tasks.Task; -import org.opensearch.tasks.TaskCancellation; - -import java.util.List; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicLong; - -/** - * TaskResourceUsageTracker is used to track completions and cancellations of search related tasks. - * - * @opensearch.internal - */ -public abstract class TaskResourceUsageTracker { - /** - * Counts the number of cancellations made due to this tracker. - */ - private final AtomicLong cancellations = new AtomicLong(); - - public long incrementCancellations() { - return cancellations.incrementAndGet(); - } - - public long getCancellations() { - return cancellations.get(); - } - - /** - * Returns a unique name for this tracker. - */ - public abstract String name(); - - /** - * Notifies the tracker to update its state when a task execution completes. - */ - public void update(Task task) {} - - /** - * Returns the cancellation reason for the given task, if it's eligible for cancellation. - */ - public abstract Optional checkAndMaybeGetCancellationReason(Task task); - - /** - * Returns the tracker's state for tasks as seen in the stats API. - */ - public abstract Stats stats(List activeTasks); - - /** - * Represents the tracker's state as seen in the stats API. - */ - public interface Stats extends ToXContentObject, Writeable {} -} diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java new file mode 100644 index 0000000000000..9750a31af3229 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -0,0 +1,162 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.trackers; + +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskCancellation; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +/** + * TaskResourceUsageTrackers is used to hold all the {@link TaskResourceUsageTracker} objects. + * + * @opensearch.internal + */ +public class TaskResourceUsageTrackers { + private TaskResourceUsageTracker cpuUsageTracker; + private TaskResourceUsageTracker heapUsageTracker; + private TaskResourceUsageTracker elapsedTimeTracker; + + public TaskResourceUsageTrackers() { } + + + /** + * adds the cpuUsageTracker + * @param cpuUsageTracker + */ + public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { + this.cpuUsageTracker = cpuUsageTracker; + } + + + /** + * adds the heapUsageTracker + * @param heapUsageTracker + */ + public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) { + this.heapUsageTracker = heapUsageTracker; + } + + + /** + * adds the elapsedTimeTracker + * @param elapsedTimeTracker + */ + public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTracker) { + this.elapsedTimeTracker = elapsedTimeTracker; + } + + + /** + * getter for cpuUsageTracker + * @return + */ + public TaskResourceUsageTracker getCpuUsageTracker() { + return cpuUsageTracker; + } + + + /** + * getter for heapUsageTacker + * @return + */ + public TaskResourceUsageTracker getHeapUsageTracker() { + return heapUsageTracker; + } + + + /** + * getter for elapsedTimeTracker + * @return + */ + public TaskResourceUsageTracker getElapsedTimeTracker() { + return elapsedTimeTracker; + } + + + /** + * Method to access all available {@link TaskResourceUsageTracker} + * @return + */ + public List all() { + return List.of(heapUsageTracker, cpuUsageTracker, elapsedTimeTracker); + } + + /** + * TaskResourceUsageTracker is used to track completions and cancellations of search related tasks. + * @opensearch.internal + */ + public static abstract class TaskResourceUsageTracker { + /** + * Counts the number of cancellations made due to this tracker. + */ + private final AtomicLong cancellations = new AtomicLong(); + + public long incrementCancellations() { + return cancellations.incrementAndGet(); + } + + public long getCancellations() { + return cancellations.get(); + } + + /** + * Returns a unique name for this tracker. + */ + public abstract String name(); + + /** + * Notifies the tracker to update its state when a task execution completes. + */ + public void update(Task task) {} + + /** + * Returns the cancellation reason for the given task, if it's eligible for cancellation. + */ + public abstract Optional checkAndMaybeGetCancellationReason(Task task); + + /** + * Returns the tracker's state for tasks as seen in the stats API. + */ + public abstract Stats stats(List activeTasks); + + + /** + * Method to get taskCancellations due to this tracker for the given {@link CancellableTask} tasks + * @param tasks + * @param cancellationCallback + * @return + */ + public List getTaskCancellations(List tasks, Runnable cancellationCallback) { + return tasks.stream().map( + task -> this.getTaskCancellation(task, cancellationCallback) + ).collect(Collectors.toList()); + } + + private TaskCancellation getTaskCancellation(final CancellableTask task, final Runnable cancellationCallback) { + Optional reason = checkAndMaybeGetCancellationReason(task); + List reasons = new ArrayList<>(); + reason.ifPresent(reasons::add); + + return new TaskCancellation(task, reasons, List.of(cancellationCallback)); + } + + /** + * Represents the tracker's state as seen in the stats API. + */ + public interface Stats extends ToXContentObject, Writeable {} + } +} diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java index 2d152e513f197..5f7daac42b973 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java @@ -46,6 +46,14 @@ public String getReasonString() { return reasons.stream().map(Reason::getMessage).collect(Collectors.joining(", ")); } + public TaskCancellation merge(final TaskCancellation other) { + final List newReasons = new ArrayList<>(reasons); + reasons.addAll(other.getReasons()); + final List newOnCancelCallbacks = new ArrayList<>(onCancelCallbacks); + newOnCancelCallbacks.addAll(other.onCancelCallbacks); + return new TaskCancellation(task, newReasons, newOnCancelCallbacks); + } + /** * Cancels the task and invokes all onCancelCallbacks. */ diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 9778798b706f4..396316c9dec47 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -24,8 +24,10 @@ import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTracker; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -89,8 +91,11 @@ public void testIsNodeInDuress() { AtomicReference cpuUsage = new AtomicReference<>(); AtomicReference heapUsage = new AtomicReference<>(); - NodeDuressTracker cpuUsageTracker = new NodeDuressTracker(() -> cpuUsage.get() >= 0.5); - NodeDuressTracker heapUsageTracker = new NodeDuressTracker(() -> heapUsage.get() >= 0.5); + NodeDuressTrackers.NodeDuressTracker cpuUsageTracker = new NodeDuressTrackers.NodeDuressTracker( + () -> cpuUsage.get() >= 0.5, + () -> 3); + NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker(() -> heapUsage.get() >= 0.5, + () -> 3); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -102,9 +107,9 @@ public void testIsNodeInDuress() { mockTaskResourceTrackingService, threadPool, System::nanoTime, - List.of(cpuUsageTracker, heapUsageTracker), - Collections.emptyList(), - Collections.emptyList(), + new NodeDuressTrackers(heapUsageTracker, cpuUsageTracker), + new TaskResourceUsageTrackers(), + new TaskResourceUsageTrackers(), taskManager ); @@ -131,7 +136,9 @@ public void testIsNodeInDuress() { public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTrackers.TaskResourceUsageTracker.class); + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -143,9 +150,9 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - Collections.emptyList(), - List.of(mockTaskResourceUsageTracker), - Collections.emptyList(), + new NodeDuressTrackers(null, null), + taskResourceUsageTrackers, + new TaskResourceUsageTrackers(), taskManager ); @@ -160,7 +167,9 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { public void testTrackerStateUpdateOnSearchShardTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTrackers.TaskResourceUsageTracker.class); + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -172,9 +181,9 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - Collections.emptyList(), - Collections.emptyList(), - List.of(mockTaskResourceUsageTracker), + new NodeDuressTrackers(null, null), + new TaskResourceUsageTrackers(), + taskResourceUsageTrackers, taskManager ); @@ -192,9 +201,12 @@ public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); + NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker( + () -> true, () -> 3); - TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addHeapUsageTracker(mockTaskResourceUsageTracker); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); @@ -204,9 +216,9 @@ public void testSearchTaskInFlightCancellation() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - List.of(mockNodeDuressTracker), - List.of(mockTaskResourceUsageTracker), - Collections.emptyList(), + new NodeDuressTrackers(mockNodeDuressTracker, null), + taskResourceUsageTrackers, + new TaskResourceUsageTrackers(), mockTaskManager ); @@ -344,8 +356,8 @@ private SearchBackpressureSettings getBackpressureSettings(String mode, double r ); } - private TaskResourceUsageTracker getMockedTaskResourceUsageTracker() { - return new TaskResourceUsageTracker() { + private TaskResourceUsageTrackers.TaskResourceUsageTracker getMockedTaskResourceUsageTracker() { + return new TaskResourceUsageTrackers.TaskResourceUsageTracker() { @Override public String name() { return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); @@ -370,7 +382,7 @@ public Stats stats(List tasks) { }; } - private static class MockStats implements TaskResourceUsageTracker.Stats { + private static class MockStats implements TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats { private final long cancellationCount; public MockStats(long cancellationCount) { diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java index f28b82cad30d3..d1b5c9cfa4944 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java @@ -12,7 +12,7 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.test.AbstractWireSerializingTestCase; @@ -30,7 +30,7 @@ protected SearchShardTaskStats createTestInstance() { } public static SearchShardTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( + Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java index cc7aa92826b41..b2cd3a7567ab8 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -12,7 +12,7 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.test.AbstractWireSerializingTestCase; @@ -31,7 +31,7 @@ protected SearchTaskStats createTestInstance() { } public static SearchTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( + Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java index 472ba95566523..ba84f76cd1c9b 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java @@ -16,20 +16,21 @@ public class NodeDuressTrackerTests extends OpenSearchTestCase { public void testNodeDuressTracker() { AtomicReference cpuUsage = new AtomicReference<>(0.0); - NodeDuressTracker tracker = new NodeDuressTracker(() -> cpuUsage.get() >= 0.5); + NodeDuressTrackers.NodeDuressTracker tracker = new NodeDuressTrackers.NodeDuressTracker(() -> cpuUsage.get() >= 0.5, + () -> 3); // Node not in duress. - assertEquals(0, tracker.check()); + assertFalse(tracker.test()); // Node in duress; the streak must keep increasing. cpuUsage.set(0.7); - assertEquals(1, tracker.check()); - assertEquals(2, tracker.check()); - assertEquals(3, tracker.check()); + assertFalse(tracker.test()); + assertFalse(tracker.test()); + assertTrue(tracker.test()); // Node not in duress anymore. cpuUsage.set(0.3); - assertEquals(0, tracker.check()); - assertEquals(0, tracker.check()); + assertFalse(tracker.test()); + assertFalse(tracker.test()); } } diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index e74f89c905499..d501148c17a76 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -9,7 +9,7 @@ package org.opensearch.tasks; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; @@ -22,9 +22,9 @@ public class TaskCancellationTests extends OpenSearchTestCase { public void testTaskCancellation() { SearchShardTask mockTask = new SearchShardTask(123L, "", "", "", null, Collections.emptyMap()); - TaskResourceUsageTracker mockTracker1 = createMockTaskResourceUsageTracker("mock_tracker_1"); - TaskResourceUsageTracker mockTracker2 = createMockTaskResourceUsageTracker("mock_tracker_2"); - TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker1 = createMockTaskResourceUsageTracker("mock_tracker_1"); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker2 = createMockTaskResourceUsageTracker("mock_tracker_2"); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); @@ -53,8 +53,8 @@ public void testTaskCancellation() { assertEquals(0, mockTracker3.getCancellations()); } - private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { - return new TaskResourceUsageTracker() { + private static TaskResourceUsageTrackers.TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { + return new TaskResourceUsageTrackers.TaskResourceUsageTracker() { @Override public String name() { return name; From 14b220cf62ba72a31c197ad6786e63aa4054c6ce Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Mon, 29 Apr 2024 22:00:43 -0700 Subject: [PATCH 02/16] add additional tests for searchBackpressureService and refactor code Signed-off-by: Kaushal Kumar --- .../SearchBackpressureService.java | 142 ++++++++------ .../stats/SearchShardTaskStats.java | 6 +- .../backpressure/stats/SearchTaskStats.java | 4 +- .../trackers/CpuUsageTracker.java | 44 +++-- .../trackers/ElapsedTimeTracker.java | 48 +++-- .../trackers/HeapUsageTracker.java | 64 +++--- .../trackers/NodeDuressTrackers.java | 6 +- .../trackers/TaskResourceUsageTrackers.java | 67 +++++-- .../opensearch/tasks/TaskCancellation.java | 9 +- .../SearchBackpressureServiceTests.java | 182 +++++++++++++++--- .../stats/SearchShardTaskStatsTests.java | 2 +- .../stats/SearchTaskStatsTests.java | 2 +- .../trackers/CpuUsageTrackerTests.java | 6 +- .../trackers/ElapsedTimeTrackerTests.java | 4 +- .../trackers/HeapUsageTrackerTests.java | 21 +- .../trackers/NodeDuressTrackerTests.java | 3 +- .../trackers/NodeDuressTrackersTests.java | 53 +++++ .../SearchBackpressureTestHelpers.java | 8 +- 18 files changed, 459 insertions(+), 212 deletions(-) create mode 100644 server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 6febd696b776a..2772f6343616b 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -42,7 +42,11 @@ import org.opensearch.threadpool.ThreadPool; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.function.DoubleSupplier; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -166,7 +170,6 @@ void doRun() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); - List cancellableTasks = new ArrayList<>(); // Force-refresh usage stats of these tasks before making a cancellation decision. taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); @@ -174,36 +177,17 @@ void doRun() { List taskCancellations = new ArrayList<>(); - addHeapBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + taskCancellations = addHeapBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); - addCPUBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + taskCancellations = addCPUBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); - addElapsedTimeBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + taskCancellations = addElapsedTimeBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); // Since these cancellations might be duplicate due to multiple trackers causing cancellation for same task // We need to merge them - taskCancellations = mergeTaskCancellations(taskCancellations); - -// // Check if increase in heap usage is due to SearchTasks -// if (HeapUsageTracker.isHeapUsageDominatedBySearch( -// searchTasks, -// getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() -// )) { -// cancellableTasks.addAll(searchTasks); -// } -// -// // Check if increase in heap usage is due to SearchShardTasks -// if (HeapUsageTracker.isHeapUsageDominatedBySearch( -// searchShardTasks, -// getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() -// )) { -// cancellableTasks.addAll(searchShardTasks); -// } - - // none of the task type is breaching the heap usage thresholds and hence we do not cancel any tasks -// if (taskCancellations.isEmpty()) { -// return; -// } + taskCancellations = mergeTaskCancellations(taskCancellations).stream() + .filter(TaskCancellation::isEligibleForCancellation) + .collect(Collectors.toList()); for (TaskCancellation taskCancellation : taskCancellations) { logger.warn( @@ -235,51 +219,86 @@ void doRun() { } } - private void addElapsedTimeBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskElapsedTimeTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getElapsedTimeTracker(); - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskElapsedTimeTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getElapsedTimeTracker(); - - taskCancellations.addAll( - searchTaskElapsedTimeTracker.getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) + private List addElapsedTimeBasedTaskCancellations( + List taskCancellations, + List searchTasks, + List searchShardTasks + ) { + final Optional searchTaskElapsedTimeTracker = + getTaskResourceUsageTrackersByType(SearchTask.class).getElapsedTimeTracker(); + final Optional searchShardTaskElapsedTimeTracker = + getTaskResourceUsageTrackersByType(SearchShardTask.class).getElapsedTimeTracker(); + + addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskElapsedTimeTracker, SearchTask.class); + + addTaskCancellationsFromTaskResourceUsageTracker( + taskCancellations, + searchShardTasks, + searchShardTaskElapsedTimeTracker, + SearchShardTask.class ); - taskCancellations.addAll( - searchShardTaskElapsedTimeTracker.getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount) - ); + return taskCancellations; } - private void addCPUBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { + private List addCPUBasedTaskCancellations( + List taskCancellations, + List searchTasks, + List searchShardTasks + ) { if (nodeDuressTrackers.isCPUInDuress()) { - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskCPUUsageTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getCpuUsageTracker(); - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskCPUUsageTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getCpuUsageTracker(); - - taskCancellations.addAll( - searchTaskCPUUsageTracker - .getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) - ); - - taskCancellations.addAll( - searchShardTaskCPUUsageTracker - .getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount) + final Optional searchTaskCPUUsageTracker = + getTaskResourceUsageTrackersByType(SearchTask.class).getCpuUsageTracker(); + final Optional searchShardTaskCPUUsageTracker = + getTaskResourceUsageTrackersByType(SearchShardTask.class).getCpuUsageTracker(); + + addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskCPUUsageTracker, SearchTask.class); + + addTaskCancellationsFromTaskResourceUsageTracker( + taskCancellations, + searchShardTasks, + searchShardTaskCPUUsageTracker, + SearchShardTask.class ); } + return taskCancellations; } - private void addHeapBasedTaskCancellations(List taskCancellations, List searchTasks, List searchShardTasks) { + private List addHeapBasedTaskCancellations( + List taskCancellations, + List searchTasks, + List searchShardTasks + ) { if (isHeapTrackingSupported() && nodeDuressTrackers.isHeapInDuress()) { - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchTaskHeapUsageTracker = getTaskResourceUsageTrackersByType(SearchTask.class).getHeapUsageTracker(); - final TaskResourceUsageTrackers.TaskResourceUsageTracker searchShardTaskHeapUsageTracker = getTaskResourceUsageTrackersByType(SearchShardTask.class).getHeapUsageTracker(); - - taskCancellations = searchTaskHeapUsageTracker - .getTaskCancellations(searchTasks, searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount); - - taskCancellations.addAll( - searchShardTaskHeapUsageTracker - .getTaskCancellations(searchShardTasks, searchBackpressureStates.get(SearchShardTask.class)::incrementCompletionCount) + final Optional searchTaskHeapUsageTracker = + getTaskResourceUsageTrackersByType(SearchTask.class).getHeapUsageTracker(); + final Optional searchShardTaskHeapUsageTracker = + getTaskResourceUsageTrackersByType(SearchShardTask.class).getHeapUsageTracker(); + + addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskHeapUsageTracker, SearchTask.class); + + addTaskCancellationsFromTaskResourceUsageTracker( + taskCancellations, + searchShardTasks, + searchShardTaskHeapUsageTracker, + SearchShardTask.class ); } + return taskCancellations; } + private void addTaskCancellationsFromTaskResourceUsageTracker( + List taskCancellations, + List tasks, + Optional taskResourceUsageTracker, + Class type + ) { + taskResourceUsageTracker.ifPresent( + tracker -> taskCancellations.addAll( + tracker.getTaskCancellations(tasks, searchBackpressureStates.get(type)::incrementCancellationCount) + ) + ); + } /** * returns the taskTrackers for given type @@ -290,7 +309,6 @@ private TaskResourceUsageTrackers getTaskResourceUsageTrackersByType(Class mergeTaskCancellations(final List taskCancellations) { final Map uniqueTaskCancellations = new HashMap<>(); - for (TaskCancellation taskCancellation: taskCancellations) { + for (TaskCancellation taskCancellation : taskCancellations) { final long taskId = taskCancellation.getTask().getId(); - uniqueTaskCancellations.put(taskId, - uniqueTaskCancellations.getOrDefault(taskId, taskCancellation).merge(taskCancellation)); + uniqueTaskCancellations.put(taskId, uniqueTaskCancellations.getOrDefault(taskId, taskCancellation).merge(taskCancellation)); } return new ArrayList<>(uniqueTaskCancellations.values()); } + /** * Given a task, returns the type of the task */ diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java index 5bd8fc2f18025..5993ebd29e7cf 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -18,8 +18,8 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import java.io.IOException; import java.util.Map; @@ -67,7 +67,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry< + TaskResourceUsageTrackerType, + TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index 37d62da635677..7453240de0577 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -68,7 +68,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry< + TaskResourceUsageTrackerType, + TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index e26e3065aa393..331afaf7170c3 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -34,7 +34,30 @@ public class CpuUsageTracker extends TaskResourceUsageTrackers.TaskResourceUsage private final LongSupplier thresholdSupplier; public CpuUsageTracker(LongSupplier thresholdSupplier) { + this(thresholdSupplier, (task) -> { + long usage = task.getTotalResourceStats().getCpuTimeInNanos(); + long threshold = thresholdSupplier.getAsLong(); + + if (usage < threshold) { + return Optional.empty(); + } + + return Optional.of( + new TaskCancellation.Reason( + "cpu usage exceeded [" + + new TimeValue(usage, TimeUnit.NANOSECONDS) + + " >= " + + new TimeValue(threshold, TimeUnit.NANOSECONDS) + + "]", + 1 // TODO: fine-tune the cancellation score/weight + ) + ); + }); + } + + public CpuUsageTracker(LongSupplier thresholdSupplier, ResourceUsageBreachEvaluator resourceUsageBreachEvaluator) { this.thresholdSupplier = thresholdSupplier; + this.resourceUsageBreachEvaluator = resourceUsageBreachEvaluator; } @Override @@ -42,27 +65,6 @@ public String name() { return CPU_USAGE_TRACKER.getName(); } - @Override - public Optional checkAndMaybeGetCancellationReason(Task task) { - long usage = task.getTotalResourceStats().getCpuTimeInNanos(); - long threshold = thresholdSupplier.getAsLong(); - - if (usage < threshold) { - return Optional.empty(); - } - - return Optional.of( - new TaskCancellation.Reason( - "cpu usage exceeded [" - + new TimeValue(usage, TimeUnit.NANOSECONDS) - + " >= " - + new TimeValue(threshold, TimeUnit.NANOSECONDS) - + "]", - 1 // TODO: fine-tune the cancellation score/weight - ) - ); - } - @Override public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java index a9e7925f3a81e..dc21b037ade75 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTracker.java @@ -34,8 +34,35 @@ public class ElapsedTimeTracker extends TaskResourceUsageTrackers.TaskResourceUs private final LongSupplier timeNanosSupplier; public ElapsedTimeTracker(LongSupplier thresholdSupplier, LongSupplier timeNanosSupplier) { + this(thresholdSupplier, timeNanosSupplier, (Task task) -> { + long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); + long threshold = thresholdSupplier.getAsLong(); + + if (usage < threshold) { + return Optional.empty(); + } + + return Optional.of( + new TaskCancellation.Reason( + "elapsed time exceeded [" + + new TimeValue(usage, TimeUnit.NANOSECONDS) + + " >= " + + new TimeValue(threshold, TimeUnit.NANOSECONDS) + + "]", + 1 // TODO: fine-tune the cancellation score/weight + ) + ); + }); + } + + public ElapsedTimeTracker( + LongSupplier thresholdSupplier, + LongSupplier timeNanosSupplier, + ResourceUsageBreachEvaluator resourceUsageBreachEvaluator + ) { this.thresholdSupplier = thresholdSupplier; this.timeNanosSupplier = timeNanosSupplier; + this.resourceUsageBreachEvaluator = resourceUsageBreachEvaluator; } @Override @@ -43,27 +70,6 @@ public String name() { return ELAPSED_TIME_TRACKER.getName(); } - @Override - public Optional checkAndMaybeGetCancellationReason(Task task) { - long usage = timeNanosSupplier.getAsLong() - task.getStartTimeNanos(); - long threshold = thresholdSupplier.getAsLong(); - - if (usage < threshold) { - return Optional.empty(); - } - - return Optional.of( - new TaskCancellation.Reason( - "elapsed time exceeded [" - + new TimeValue(usage, TimeUnit.NANOSECONDS) - + " >= " - + new TimeValue(threshold, TimeUnit.NANOSECONDS) - + "]", - 1 // TODO: fine-tune the cancellation score/weight - ) - ); - } - @Override public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java index d966f40b9c78a..ad2cba125b041 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/HeapUsageTracker.java @@ -55,6 +55,43 @@ public HeapUsageTracker( this.heapPercentThresholdSupplier = heapPercentThresholdSupplier; this.movingAverageReference = new AtomicReference<>(new MovingAverage(heapMovingAverageWindowSize)); clusterSettings.addSettingsUpdateConsumer(windowSizeSetting, this::updateWindowSize); + setDefaultResourceUsageBreachEvaluator(); + } + + /** + * Had to refactor this method out of the constructor as we can't pass a lambda which references a member variable in constructor + * error: cannot reference movingAverageReference before supertype constructor has been called + */ + private void setDefaultResourceUsageBreachEvaluator() { + this.resourceUsageBreachEvaluator = (task) -> { + MovingAverage movingAverage = movingAverageReference.get(); + + // There haven't been enough measurements. + if (movingAverage.isReady() == false) { + return Optional.empty(); + } + + double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); + double averageUsage = movingAverage.getAverage(); + double variance = heapVarianceSupplier.getAsDouble(); + double allowedUsage = averageUsage * variance; + double threshold = heapPercentThresholdSupplier.getAsDouble() * HEAP_SIZE_BYTES; + + if (isHeapTrackingSupported() == false || currentUsage < threshold || currentUsage < allowedUsage) { + return Optional.empty(); + } + + return Optional.of( + new TaskCancellation.Reason( + "heap usage exceeded [" + + new ByteSizeValue((long) currentUsage) + + " >= " + + new ByteSizeValue((long) allowedUsage) + + "]", + (int) (currentUsage / averageUsage) // TODO: fine-tune the cancellation score/weight + ) + ); + }; } @Override @@ -67,33 +104,6 @@ public void update(Task task) { movingAverageReference.get().record(task.getTotalResourceStats().getMemoryInBytes()); } - @Override - public Optional checkAndMaybeGetCancellationReason(Task task) { - MovingAverage movingAverage = movingAverageReference.get(); - - // There haven't been enough measurements. - if (movingAverage.isReady() == false) { - return Optional.empty(); - } - - double currentUsage = task.getTotalResourceStats().getMemoryInBytes(); - double averageUsage = movingAverage.getAverage(); - double variance = heapVarianceSupplier.getAsDouble(); - double allowedUsage = averageUsage * variance; - double threshold = heapPercentThresholdSupplier.getAsDouble() * HEAP_SIZE_BYTES; - - if (isHeapTrackingSupported() == false || currentUsage < threshold || currentUsage < allowedUsage) { - return Optional.empty(); - } - - return Optional.of( - new TaskCancellation.Reason( - "heap usage exceeded [" + new ByteSizeValue((long) currentUsage) + " >= " + new ByteSizeValue((long) allowedUsage) + "]", - (int) (currentUsage / averageUsage) // TODO: fine-tune the cancellation score/weight - ) - ); - } - private void updateWindowSize(int heapMovingAverageWindowSize) { this.movingAverageReference.set(new MovingAverage(heapMovingAverageWindowSize)); } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java index ef22d8e5b7249..272d8e7b23b9d 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java @@ -28,7 +28,6 @@ public NodeDuressTrackers(final NodeDuressTracker heapDuressTracker, final NodeD this.cpuDuressTracker = cpuDuressTracker; } - /** * Method to check the heap duress * @return true if heap is in duress @@ -37,7 +36,6 @@ public boolean isHeapInDuress() { return heapDuressTracker.test(); } - /** * Method to check the CPU duress * @return true if cpu is in duress @@ -54,7 +52,6 @@ public boolean isNodeInDuress() { return isCPUInDuress() || isHeapInDuress(); } - /** * NodeDuressTracker is used to check if the node is in duress * @opensearch.internal @@ -75,8 +72,7 @@ public static class NodeDuressTracker { */ private final IntSupplier maxBreachAllowedSupplier; - public NodeDuressTracker(BooleanSupplier isNodeInDuress, - IntSupplier maxBreachAllowedSupplier) { + public NodeDuressTracker(BooleanSupplier isNodeInDuress, IntSupplier maxBreachAllowedSupplier) { this.isNodeInDuress = isNodeInDuress; this.maxBreachAllowedSupplier = maxBreachAllowedSupplier; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 9750a31af3229..2d9becff610cb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -29,9 +29,11 @@ public class TaskResourceUsageTrackers { private TaskResourceUsageTracker cpuUsageTracker; private TaskResourceUsageTracker heapUsageTracker; private TaskResourceUsageTracker elapsedTimeTracker; + private final List all; - public TaskResourceUsageTrackers() { } - + public TaskResourceUsageTrackers() { + all = new ArrayList<>(3); + } /** * adds the cpuUsageTracker @@ -39,60 +41,57 @@ public TaskResourceUsageTrackers() { } */ public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { this.cpuUsageTracker = cpuUsageTracker; + all.add(cpuUsageTracker); } - /** * adds the heapUsageTracker * @param heapUsageTracker */ public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) { this.heapUsageTracker = heapUsageTracker; + all.add(heapUsageTracker); } - /** * adds the elapsedTimeTracker * @param elapsedTimeTracker */ public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTracker) { this.elapsedTimeTracker = elapsedTimeTracker; + all.add(elapsedTimeTracker); } - /** * getter for cpuUsageTracker * @return */ - public TaskResourceUsageTracker getCpuUsageTracker() { - return cpuUsageTracker; + public Optional getCpuUsageTracker() { + return Optional.ofNullable(cpuUsageTracker); } - /** * getter for heapUsageTacker * @return */ - public TaskResourceUsageTracker getHeapUsageTracker() { - return heapUsageTracker; + public Optional getHeapUsageTracker() { + return Optional.ofNullable(heapUsageTracker); } - /** * getter for elapsedTimeTracker * @return */ - public TaskResourceUsageTracker getElapsedTimeTracker() { - return elapsedTimeTracker; + public Optional getElapsedTimeTracker() { + return Optional.ofNullable(elapsedTimeTracker); } - /** * Method to access all available {@link TaskResourceUsageTracker} * @return */ public List all() { - return List.of(heapUsageTracker, cpuUsageTracker, elapsedTimeTracker); + return all; } /** @@ -104,6 +103,15 @@ public static abstract class TaskResourceUsageTracker { * Counts the number of cancellations made due to this tracker. */ private final AtomicLong cancellations = new AtomicLong(); + protected ResourceUsageBreachEvaluator resourceUsageBreachEvaluator; + + /** + * for test purposes only + * @param resourceUsageBreachEvaluator + */ + public void setResourceUsageBreachEvaluator(final ResourceUsageBreachEvaluator resourceUsageBreachEvaluator) { + this.resourceUsageBreachEvaluator = resourceUsageBreachEvaluator; + } public long incrementCancellations() { return cancellations.incrementAndGet(); @@ -126,14 +134,15 @@ public void update(Task task) {} /** * Returns the cancellation reason for the given task, if it's eligible for cancellation. */ - public abstract Optional checkAndMaybeGetCancellationReason(Task task); + public Optional checkAndMaybeGetCancellationReason(Task task) { + return resourceUsageBreachEvaluator.evaluate(task); + } /** * Returns the tracker's state for tasks as seen in the stats API. */ public abstract Stats stats(List activeTasks); - /** * Method to get taskCancellations due to this tracker for the given {@link CancellableTask} tasks * @param tasks @@ -141,9 +150,15 @@ public void update(Task task) {} * @return */ public List getTaskCancellations(List tasks, Runnable cancellationCallback) { - return tasks.stream().map( - task -> this.getTaskCancellation(task, cancellationCallback) - ).collect(Collectors.toList()); + return tasks.stream() + .map(task -> this.getTaskCancellation(task, cancellationCallback)) + .filter(TaskCancellation::isEligibleForCancellation) + .map(taskCancellation -> { + List onCancelCallbacks = new ArrayList<>(taskCancellation.getOnCancelCallbacks()); + onCancelCallbacks.add(this::incrementCancellations); + return new TaskCancellation(taskCancellation.getTask(), taskCancellation.getReasons(), onCancelCallbacks); + }) + .collect(Collectors.toList()); } private TaskCancellation getTaskCancellation(final CancellableTask task, final Runnable cancellationCallback) { @@ -158,5 +173,17 @@ private TaskCancellation getTaskCancellation(final CancellableTask task, final R * Represents the tracker's state as seen in the stats API. */ public interface Stats extends ToXContentObject, Writeable {} + + /** + * This interface carries the logic to decide whether a task should be cancelled or not + */ + public interface ResourceUsageBreachEvaluator { + /** + * evaluates whether the task is eligible for cancellation based on {@link TaskResourceUsageTracker} implementation + * @param task + * @return a {@link TaskCancellation.Reason} why this task should be cancelled otherwise empty + */ + public Optional evaluate(final Task task); + } } } diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java index 5f7daac42b973..872f5b79bb205 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellation.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellation.java @@ -42,13 +42,20 @@ public List getReasons() { return reasons; } + public List getOnCancelCallbacks() { + return onCancelCallbacks; + } + public String getReasonString() { return reasons.stream().map(Reason::getMessage).collect(Collectors.joining(", ")); } public TaskCancellation merge(final TaskCancellation other) { + if (other == this) { + return this; + } final List newReasons = new ArrayList<>(reasons); - reasons.addAll(other.getReasons()); + newReasons.addAll(other.getReasons()); final List newOnCancelCallbacks = new ArrayList<>(onCancelCallbacks); newOnCancelCallbacks.addAll(other.onCancelCallbacks); return new TaskCancellation(task, newReasons, newOnCancelCallbacks); diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 396316c9dec47..289565f45041a 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -23,9 +23,7 @@ import org.opensearch.search.backpressure.stats.SearchBackpressureStats; import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; -import org.opensearch.search.backpressure.trackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.NodeDuressTrackers; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.tasks.CancellableTask; @@ -93,9 +91,12 @@ public void testIsNodeInDuress() { AtomicReference heapUsage = new AtomicReference<>(); NodeDuressTrackers.NodeDuressTracker cpuUsageTracker = new NodeDuressTrackers.NodeDuressTracker( () -> cpuUsage.get() >= 0.5, - () -> 3); - NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker(() -> heapUsage.get() >= 0.5, - () -> 3); + () -> 3 + ); + NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker( + () -> heapUsage.get() >= 0.5, + () -> 3 + ); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -136,7 +137,9 @@ public void testIsNodeInDuress() { public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTrackers.TaskResourceUsageTracker.class); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock( + TaskResourceUsageTrackers.TaskResourceUsageTracker.class + ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); @@ -158,7 +161,7 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { for (int i = 0; i < 100; i++) { // service.onTaskCompleted(new SearchTask(1, "test", "test", () -> "Test", TaskId.EMPTY_TASK_ID, new HashMap<>())); - service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200)); + service.onTaskCompleted(createMockTaskWithResourceStats(SearchTask.class, 100, 200, i)); } assertEquals(100, service.getSearchBackpressureState(SearchTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); @@ -167,7 +170,9 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { public void testTrackerStateUpdateOnSearchShardTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTrackers.TaskResourceUsageTracker.class); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock( + TaskResourceUsageTrackers.TaskResourceUsageTracker.class + ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); @@ -188,9 +193,9 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { ); // Record task completions to update the tracker state. Tasks other than SearchTask & SearchShardTask are ignored. - service.onTaskCompleted(createMockTaskWithResourceStats(CancellableTask.class, 100, 200)); + service.onTaskCompleted(createMockTaskWithResourceStats(CancellableTask.class, 100, 200, 101)); for (int i = 0; i < 100; i++) { - service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200)); + service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, 200, i)); } assertEquals(100, service.getSearchBackpressureState(SearchShardTask.class).getCompletionCount()); verify(mockTaskResourceUsageTracker, times(100)).update(any()); @@ -201,12 +206,20 @@ public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker( - () -> true, () -> 3); + NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + (task) -> { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { + return Optional.empty(); + } + + return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); + } + ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addHeapUsageTracker(mockTaskResourceUsageTracker); + taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); @@ -216,7 +229,7 @@ public void testSearchTaskInFlightCancellation() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - new NodeDuressTrackers(mockNodeDuressTracker, null), + new NodeDuressTrackers(new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), mockNodeDuressTracker), taskResourceUsageTrackers, new TaskResourceUsageTrackers(), mockTaskManager @@ -237,9 +250,9 @@ public void testSearchTaskInFlightCancellation() { Map activeSearchTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { if (i % 3 == 0) { - activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes)); + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, taskHeapUsageBytes, i)); } else { - activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes)); + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, taskHeapUsageBytes, i)); } } doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); @@ -277,9 +290,25 @@ public void testSearchShardTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true); + NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3); + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), + mockNodeDuressTracker + ); - TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker(); + TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + (task) -> { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { + return Optional.empty(); + } + + return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); + } + ); + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); @@ -289,9 +318,9 @@ public void testSearchShardTaskInFlightCancellation() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - List.of(mockNodeDuressTracker), - Collections.emptyList(), - List.of(mockTaskResourceUsageTracker), + nodeDuressTrackers, + new TaskResourceUsageTrackers(), + taskResourceUsageTrackers, mockTaskManager ); @@ -310,9 +339,9 @@ public void testSearchShardTaskInFlightCancellation() { Map activeSearchShardTasks = new HashMap<>(); for (long i = 0; i < 75; i++) { if (i % 5 == 0) { - activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 500, taskHeapUsageBytes, i)); } else { - activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); + activeSearchShardTasks.put(i, createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes, i)); } } doReturn(activeSearchShardTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); @@ -330,7 +359,7 @@ public void testSearchShardTaskInFlightCancellation() { // Simulate task completion to replenish some tokens. // This will add 2 tokens (task count delta * cancellationRatio) to 'rateLimitPerTaskCompletion'. for (int i = 0; i < 20; i++) { - service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes)); + service.onTaskCompleted(createMockTaskWithResourceStats(SearchShardTask.class, 100, taskHeapUsageBytes, i)); } service.doRun(); verify(mockTaskManager, times(12)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); @@ -347,6 +376,96 @@ public void testSearchShardTaskInFlightCancellation() { assertEquals(expectedStats, actualStats); } + public void testNonCancellationOfHeapBasedTasksWhenHeapNotInDuress() { + TaskManager mockTaskManager = spy(taskManager); + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + AtomicLong mockTime = new AtomicLong(0); + LongSupplier mockTimeNanosSupplier = mockTime::get; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), + new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) + ); + + // Creating heap and cpu usage trackers where heap tracker will always evaluate with reasons to cancel the + // tasks but heap based cancellation should not happen because heap is not in duress + TaskResourceUsageTrackers.TaskResourceUsageTracker heapUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + (task) -> Optional.of(new TaskCancellation.Reason("mem exceeded", 10)) + ); + TaskResourceUsageTrackers.TaskResourceUsageTracker cpuUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + (task) -> { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 400) { + return Optional.empty(); + } + return Optional.of(new TaskCancellation.Reason("cpu time limit exceeded", 5)); + } + ); + + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addCpuUsageTracker(cpuUsageTracker); + taskResourceUsageTrackers.addHeapUsageTracker(heapUsageTracker); + + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); + + SearchBackpressureService service = new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + threadPool, + mockTimeNanosSupplier, + nodeDuressTrackers, + taskResourceUsageTrackers, + new TaskResourceUsageTrackers(), + mockTaskManager + ); + + service.doRun(); + service.doRun(); + + SearchTaskSettings searchTaskSettings = mock(SearchTaskSettings.class); + // setting the total heap percent threshold to minimum so that circuit does not break in SearchBackpressureService + when(searchTaskSettings.getTotalHeapPercentThreshold()).thenReturn(0.0); + when(settings.getSearchTaskSettings()).thenReturn(searchTaskSettings); + + // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). + Map activeSearchTasks = new HashMap<>(); + for (long i = 0; i < 75; i++) { + if (i % 5 == 0) { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 500, 800, i)); + } else { + activeSearchTasks.put(i, createMockTaskWithResourceStats(SearchTask.class, 100, 800, i)); + } + } + doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); + + // this will trigger cancellation but these cancellation should only be cpu based + service.doRun(); + verify(mockTaskManager, times(5)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); + assertEquals(5, service.getSearchBackpressureState(SearchTask.class).getCancellationCount()); + assertEquals(1, service.getSearchBackpressureState(SearchTask.class).getLimitReachedCount()); + + SearchBackpressureStats expectedStats = new SearchBackpressureStats( + new SearchTaskStats( + 5, + 1, + 0, + Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + new MockStats(5), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + new MockStats(0) + ) + ), + new SearchShardTaskStats(0, 0, 0, Collections.emptyMap()), + SearchBackpressureMode.ENFORCED + ); + + SearchBackpressureStats actualStats = service.nodeStats(); + assertEquals(expectedStats, actualStats); + } + private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { return spy( new SearchBackpressureSettings( @@ -356,11 +475,14 @@ private SearchBackpressureSettings getBackpressureSettings(String mode, double r ); } - private TaskResourceUsageTrackers.TaskResourceUsageTracker getMockedTaskResourceUsageTracker() { + private TaskResourceUsageTrackers.TaskResourceUsageTracker getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType type, + TaskResourceUsageTrackers.TaskResourceUsageTracker.ResourceUsageBreachEvaluator evaluator + ) { return new TaskResourceUsageTrackers.TaskResourceUsageTracker() { @Override public String name() { - return TaskResourceUsageTrackerType.CPU_USAGE_TRACKER.getName(); + return type.getName(); } @Override @@ -368,11 +490,7 @@ public void update(Task task) {} @Override public Optional checkAndMaybeGetCancellationReason(Task task) { - if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { - return Optional.empty(); - } - - return Optional.of(new TaskCancellation.Reason("limits exceeded", 5)); + return evaluator.evaluate(task); } @Override diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java index d1b5c9cfa4944..e7057676ad89b 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java @@ -12,8 +12,8 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.test.AbstractWireSerializingTestCase; import java.util.Map; diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java index b2cd3a7567ab8..b187811d48247 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -12,8 +12,8 @@ import org.opensearch.search.backpressure.trackers.CpuUsageTracker; import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.test.AbstractWireSerializingTestCase; import java.util.Map; diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java index 8cdcbc7511bd2..0117b0ed71c27 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/CpuUsageTrackerTests.java @@ -33,7 +33,7 @@ public class CpuUsageTrackerTests extends OpenSearchTestCase { ); public void testSearchTaskEligibleForCancellation() { - Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200); + Task task = createMockTaskWithResourceStats(SearchTask.class, 100000000, 200, randomNonNegativeLong()); CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); @@ -43,7 +43,7 @@ public void testSearchTaskEligibleForCancellation() { } public void testSearchShardTaskEligibleForCancellation() { - Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200); + Task task = createMockTaskWithResourceStats(SearchShardTask.class, 200000000, 200, randomNonNegativeLong()); CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); @@ -53,7 +53,7 @@ public void testSearchShardTaskEligibleForCancellation() { } public void testNotEligibleForCancellation() { - Task task = createMockTaskWithResourceStats(SearchShardTask.class, 5000000, 200); + Task task = createMockTaskWithResourceStats(SearchShardTask.class, 5000000, 200, randomNonNegativeLong()); CpuUsageTracker tracker = new CpuUsageTracker(mockSettings.getSearchShardTaskSettings()::getCpuTimeNanosThreshold); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java index 921d01e7355a7..514f1b4785aa1 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/ElapsedTimeTrackerTests.java @@ -47,7 +47,7 @@ public void testSearchTaskEligibleForCancellation() { } public void testSearchShardTaskEligibleForCancellation() { - Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0); + Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 0, randomNonNegativeLong()); ElapsedTimeTracker tracker = new ElapsedTimeTracker( mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, () -> 200000000 @@ -60,7 +60,7 @@ public void testSearchShardTaskEligibleForCancellation() { } public void testNotEligibleForCancellation() { - Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 150000000); + Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, 150000000, randomNonNegativeLong()); ElapsedTimeTracker tracker = new ElapsedTimeTracker( mockSettings.getSearchShardTaskSettings()::getElapsedTimeNanosThreshold, () -> 200000000 diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java index 3950d00b0c8b5..1c46305e9fda6 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/HeapUsageTrackerTests.java @@ -58,7 +58,7 @@ public void testSearchTaskEligibleForCancellation() { SearchTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); - Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50); + Task task = createMockTaskWithResourceStats(SearchTask.class, 1, 50, randomNonNegativeLong()); // Record enough observations to make the moving average 'ready'. for (int i = 0; i < HEAP_MOVING_AVERAGE_WINDOW_SIZE; i++) { @@ -66,7 +66,7 @@ public void testSearchTaskEligibleForCancellation() { } // Task that has heap usage >= heapBytesThreshold and (movingAverage * heapVariance). - task = createMockTaskWithResourceStats(SearchTask.class, 1, 300); + task = createMockTaskWithResourceStats(SearchTask.class, 1, 300, randomNonNegativeLong()); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); assertEquals(6, reason.get().getCancellationScore()); @@ -88,7 +88,7 @@ public void testSearchShardTaskEligibleForCancellation() { SearchShardTaskSettings.SETTING_HEAP_MOVING_AVERAGE_WINDOW_SIZE ) ); - Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50); + Task task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 50, randomNonNegativeLong()); // Record enough observations to make the moving average 'ready'. for (int i = 0; i < HEAP_MOVING_AVERAGE_WINDOW_SIZE; i++) { @@ -96,7 +96,7 @@ public void testSearchShardTaskEligibleForCancellation() { } // Task that has heap usage >= heapBytesThreshold and (movingAverage * heapVariance). - task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 200); + task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 200, randomNonNegativeLong()); Optional reason = tracker.checkAndMaybeGetCancellationReason(task); assertTrue(reason.isPresent()); assertEquals(4, reason.get().getCancellationScore()); @@ -122,7 +122,7 @@ public void testNotEligibleForCancellation() { ); // Task with heap usage < heapBytesThreshold. - task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 99); + task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 99, randomNonNegativeLong()); // Not enough observations. reason = tracker.checkAndMaybeGetCancellationReason(task); @@ -139,7 +139,12 @@ public void testNotEligibleForCancellation() { // Task with heap usage between heapBytesThreshold and (movingAverage * heapVariance) should not be cancelled. double allowedHeapUsage = 99.0 * 2.0; - task = createMockTaskWithResourceStats(SearchShardTask.class, 1, randomLongBetween(99, (long) allowedHeapUsage - 1)); + task = createMockTaskWithResourceStats( + SearchShardTask.class, + 1, + randomLongBetween(99, (long) allowedHeapUsage - 1), + randomNonNegativeLong() + ); reason = tracker.checkAndMaybeGetCancellationReason(task); assertFalse(reason.isPresent()); } @@ -148,12 +153,12 @@ public void testIsHeapUsageDominatedBySearch() { assumeTrue("Skip the test if the hardware doesn't support heap usage tracking", HeapUsageTracker.isHeapTrackingSupported()); // task with 1 byte of heap usage so that it does not breach the threshold - CancellableTask task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1); + CancellableTask task = createMockTaskWithResourceStats(SearchShardTask.class, 1, 1, randomNonNegativeLong()); assertFalse(HeapUsageTracker.isHeapUsageDominatedBySearch(List.of(task), 0.5)); long totalHeap = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); // task with heap usage of [totalHeap - 1] so that it breaches the threshold - task = createMockTaskWithResourceStats(SearchShardTask.class, 1, totalHeap - 1); + task = createMockTaskWithResourceStats(SearchShardTask.class, 1, totalHeap - 1, randomNonNegativeLong()); assertTrue(HeapUsageTracker.isHeapUsageDominatedBySearch(List.of(task), 0.5)); } } diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java index ba84f76cd1c9b..747bcd41209b9 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java @@ -16,8 +16,7 @@ public class NodeDuressTrackerTests extends OpenSearchTestCase { public void testNodeDuressTracker() { AtomicReference cpuUsage = new AtomicReference<>(0.0); - NodeDuressTrackers.NodeDuressTracker tracker = new NodeDuressTrackers.NodeDuressTracker(() -> cpuUsage.get() >= 0.5, - () -> 3); + NodeDuressTrackers.NodeDuressTracker tracker = new NodeDuressTrackers.NodeDuressTracker(() -> cpuUsage.get() >= 0.5, () -> 3); // Node not in duress. assertFalse(tracker.test()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java new file mode 100644 index 0000000000000..9b8581559b1e2 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.backpressure.trackers; + +import org.opensearch.test.OpenSearchTestCase; + +public class NodeDuressTrackersTests extends OpenSearchTestCase { + + public void testNodeInDuressWhenHeapInDuress() { + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3), + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1) + ); + + assertFalse(nodeDuressTrackers.isNodeInDuress()); + assertFalse(nodeDuressTrackers.isNodeInDuress()); + + // for the third time it should be in duress + assertTrue(nodeDuressTrackers.isNodeInDuress()); + } + + public void testNodeInDuressWhenCPUInDuress() { + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1), + new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) + ); + + assertFalse(nodeDuressTrackers.isNodeInDuress()); + assertFalse(nodeDuressTrackers.isNodeInDuress()); + + // for the third time it should be in duress + assertTrue(nodeDuressTrackers.isNodeInDuress()); + } + + public void testNodeInDuressWhenCPUAndHeapInDuress() { + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3), + new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) + ); + + assertFalse(nodeDuressTrackers.isNodeInDuress()); + assertFalse(nodeDuressTrackers.isNodeInDuress()); + + // for the third time it should be in duress + assertTrue(nodeDuressTrackers.isNodeInDuress()); + } +} diff --git a/test/framework/src/main/java/org/opensearch/search/backpressure/SearchBackpressureTestHelpers.java b/test/framework/src/main/java/org/opensearch/search/backpressure/SearchBackpressureTestHelpers.java index af06b1688dca2..8f31f2a60ea86 100644 --- a/test/framework/src/main/java/org/opensearch/search/backpressure/SearchBackpressureTestHelpers.java +++ b/test/framework/src/main/java/org/opensearch/search/backpressure/SearchBackpressureTestHelpers.java @@ -21,19 +21,21 @@ public class SearchBackpressureTestHelpers extends OpenSearchTestCase { - public static T createMockTaskWithResourceStats(Class type, long cpuUsage, long heapUsage) { - return createMockTaskWithResourceStats(type, cpuUsage, heapUsage, 0); + public static T createMockTaskWithResourceStats(Class type, long cpuUsage, long heapUsage, long taskId) { + return createMockTaskWithResourceStats(type, cpuUsage, heapUsage, 0, taskId); } public static T createMockTaskWithResourceStats( Class type, long cpuUsage, long heapUsage, - long startTimeNanos + long startTimeNanos, + long taskId ) { T task = mock(type); when(task.getTotalResourceStats()).thenReturn(new TaskResourceUsage(cpuUsage, heapUsage)); when(task.getStartTimeNanos()).thenReturn(startTimeNanos); + when(task.getId()).thenReturn(randomNonNegativeLong()); AtomicBoolean isCancelled = new AtomicBoolean(false); doAnswer(invocation -> { From 326b84d701df92688a58f421de8a715ff6b194ac Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Tue, 30 Apr 2024 10:59:32 -0700 Subject: [PATCH 03/16] add enumMap instead of list for tracking taskResourceUsageTrackets Signed-off-by: Kaushal Kumar --- .../trackers/TaskResourceUsageTrackers.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 2d9becff610cb..1d9f90b763626 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -15,6 +15,7 @@ import org.opensearch.tasks.TaskCancellation; import java.util.ArrayList; +import java.util.EnumMap; import java.util.List; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; @@ -29,10 +30,10 @@ public class TaskResourceUsageTrackers { private TaskResourceUsageTracker cpuUsageTracker; private TaskResourceUsageTracker heapUsageTracker; private TaskResourceUsageTracker elapsedTimeTracker; - private final List all; + private final EnumMap all; public TaskResourceUsageTrackers() { - all = new ArrayList<>(3); + all = new EnumMap<>(TaskResourceUsageTrackerType.class); } /** @@ -41,7 +42,7 @@ public TaskResourceUsageTrackers() { */ public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { this.cpuUsageTracker = cpuUsageTracker; - all.add(cpuUsageTracker); + all.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, cpuUsageTracker); } /** @@ -50,7 +51,7 @@ public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { */ public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) { this.heapUsageTracker = heapUsageTracker; - all.add(heapUsageTracker); + all.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, heapUsageTracker); } /** @@ -59,7 +60,7 @@ public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) */ public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTracker) { this.elapsedTimeTracker = elapsedTimeTracker; - all.add(elapsedTimeTracker); + all.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, elapsedTimeTracker); } /** @@ -91,7 +92,7 @@ public Optional getElapsedTimeTracker() { * @return */ public List all() { - return all; + return new ArrayList<>(all.values()); } /** From 9891c4e8bedb6753161f5dd7af57d1833c217002 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Tue, 30 Apr 2024 11:03:24 -0700 Subject: [PATCH 04/16] add nodeNotInDuress test for nodeDuressTrackers class Signed-off-by: Kaushal Kumar --- .../trackers/NodeDuressTrackersTests.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java index 9b8581559b1e2..a44ef446a32f5 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java @@ -12,6 +12,17 @@ public class NodeDuressTrackersTests extends OpenSearchTestCase { + public void testNodeNotInDuress() { + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2), + new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2) + ); + + assertFalse(nodeDuressTrackers.isNodeInDuress()); + assertFalse(nodeDuressTrackers.isNodeInDuress()); + assertFalse(nodeDuressTrackers.isNodeInDuress()); + } + public void testNodeInDuressWhenHeapInDuress() { NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3), From b2d0c41f1bafb6480ce4e4b78db568d9fb708e56 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Tue, 21 May 2024 11:39:52 -0700 Subject: [PATCH 05/16] address comments Signed-off-by: Kaushal Kumar --- .../org/opensearch/search/ResourceType.java | 32 +++++ .../SearchBackpressureService.java | 111 +++++++----------- .../trackers/NodeDuressTrackers.java | 29 ++--- .../TaskResourceUsageTrackerType.java | 7 +- .../trackers/TaskResourceUsageTrackers.java | 26 +--- .../SearchBackpressureServiceTests.java | 54 ++++++--- .../trackers/NodeDuressTrackersTests.java | 46 +++++--- 7 files changed, 160 insertions(+), 145 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/ResourceType.java diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java new file mode 100644 index 0000000000000..a54ea08467ac2 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search; + +public enum ResourceType { + CPU("cpu"), + JVM("jvm"); + + private final String name; + ResourceType(String name) { + this.name = name; + } + + public static ResourceType fromName(String s) { + for (ResourceType resourceType: values()) { + if (resourceType.getName().equals(s)) { + return resourceType; + } + } + throw new IllegalArgumentException("Unknown resource type: [" + s + "]"); + } + + private String getName() { + return name; + } +} diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 2772f6343616b..8436b2cdb9c9a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -18,6 +18,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.monitor.jvm.JvmStats; import org.opensearch.monitor.process.ProcessProbe; +import org.opensearch.search.ResourceType; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; @@ -29,6 +30,7 @@ import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.NodeDuressTrackers; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; import org.opensearch.tasks.CancellableTask; @@ -43,11 +45,13 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.DoubleSupplier; +import java.util.function.Function; import java.util.function.LongSupplier; import java.util.stream.Collectors; @@ -61,7 +65,14 @@ */ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); - + private static final List> TRACKED_TASK_TYPES = List.of(SearchTask.class, SearchShardTask.class); + private static final Map> trackerApplyConditions = + Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (nodeDuressTrackers) -> nodeDuressTrackers.isResourceInDuress(ResourceType.CPU), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, (nodeDuressTrackers) -> + isHeapTrackingSupported() && nodeDuressTrackers.isResourceInDuress(ResourceType.JVM), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, (nodeDuressTrackers) -> true + ); private volatile Scheduler.Cancellable scheduledFuture; private final SearchBackpressureSettings settings; @@ -86,16 +97,16 @@ public SearchBackpressureService( taskResourceTrackingService, threadPool, System::nanoTime, - new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker( + new NodeDuressTrackers(new EnumMap<>(ResourceType.class) {{ + put(ResourceType.CPU, new NodeDuressTracker( () -> ProcessProbe.getInstance().getProcessCpuPercent() / 100.0 >= settings.getNodeDuressSettings().getCpuThreshold(), () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() - ), - new NodeDuressTrackers.NodeDuressTracker( + )); + put(ResourceType.JVM, new NodeDuressTracker( () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold(), () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() - ) - ), + )); + }}), getTrackers( settings.getSearchTaskSettings()::getCpuTimeNanosThreshold, settings.getSearchTaskSettings()::getHeapVarianceThreshold, @@ -170,6 +181,11 @@ void doRun() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); + final Map, List> cancellableTasks = + Map.of( + SearchTask.class, searchTasks, + SearchShardTask.class, searchShardTasks + ); // Force-refresh usage stats of these tasks before making a cancellation decision. taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); @@ -177,11 +193,15 @@ void doRun() { List taskCancellations = new ArrayList<>(); - taskCancellations = addHeapBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); - - taskCancellations = addCPUBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); - - taskCancellations = addElapsedTimeBasedTaskCancellations(taskCancellations, searchTasks, searchShardTasks); + for (TaskResourceUsageTrackerType trackerType: TaskResourceUsageTrackerType.values()) { + if (shouldApply(trackerType)) { + addResourceTrackerBasedCancellations( + trackerType, + taskCancellations, + cancellableTasks + ); + } + } // Since these cancellations might be duplicate due to multiple trackers causing cancellation for same task // We need to merge them @@ -219,74 +239,31 @@ void doRun() { } } - private List addElapsedTimeBasedTaskCancellations( - List taskCancellations, - List searchTasks, - List searchShardTasks - ) { - final Optional searchTaskElapsedTimeTracker = - getTaskResourceUsageTrackersByType(SearchTask.class).getElapsedTimeTracker(); - final Optional searchShardTaskElapsedTimeTracker = - getTaskResourceUsageTrackersByType(SearchShardTask.class).getElapsedTimeTracker(); - - addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskElapsedTimeTracker, SearchTask.class); - - addTaskCancellationsFromTaskResourceUsageTracker( - taskCancellations, - searchShardTasks, - searchShardTaskElapsedTimeTracker, - SearchShardTask.class - ); - - return taskCancellations; + private boolean shouldApply(TaskResourceUsageTrackerType trackerType) { + return trackerApplyConditions.get(trackerType).apply(nodeDuressTrackers); } - private List addCPUBasedTaskCancellations( + private List addResourceTrackerBasedCancellations( + TaskResourceUsageTrackerType type, List taskCancellations, - List searchTasks, - List searchShardTasks + Map, List> cancellableTasks ) { - if (nodeDuressTrackers.isCPUInDuress()) { - final Optional searchTaskCPUUsageTracker = - getTaskResourceUsageTrackersByType(SearchTask.class).getCpuUsageTracker(); - final Optional searchShardTaskCPUUsageTracker = - getTaskResourceUsageTrackersByType(SearchShardTask.class).getCpuUsageTracker(); - - addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskCPUUsageTracker, SearchTask.class); + for (Class taskType: TRACKED_TASK_TYPES) { + final Optional taskResourceTracker = + getTaskResourceUsageTrackersByType(taskType).getTracker(type); addTaskCancellationsFromTaskResourceUsageTracker( taskCancellations, - searchShardTasks, - searchShardTaskCPUUsageTracker, - SearchShardTask.class + cancellableTasks.get(taskType), + taskResourceTracker, + taskType ); } - return taskCancellations; - } - - private List addHeapBasedTaskCancellations( - List taskCancellations, - List searchTasks, - List searchShardTasks - ) { - if (isHeapTrackingSupported() && nodeDuressTrackers.isHeapInDuress()) { - final Optional searchTaskHeapUsageTracker = - getTaskResourceUsageTrackersByType(SearchTask.class).getHeapUsageTracker(); - final Optional searchShardTaskHeapUsageTracker = - getTaskResourceUsageTrackersByType(SearchShardTask.class).getHeapUsageTracker(); - addTaskCancellationsFromTaskResourceUsageTracker(taskCancellations, searchTasks, searchTaskHeapUsageTracker, SearchTask.class); - - addTaskCancellationsFromTaskResourceUsageTracker( - taskCancellations, - searchShardTasks, - searchShardTaskHeapUsageTracker, - SearchShardTask.class - ); - } return taskCancellations; } + private void addTaskCancellationsFromTaskResourceUsageTracker( List taskCancellations, List tasks, diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java index 272d8e7b23b9d..fe7cf722b34eb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java @@ -9,7 +9,9 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.common.util.Streak; +import org.opensearch.search.ResourceType; +import java.util.EnumMap; import java.util.function.BooleanSupplier; import java.util.function.IntSupplier; @@ -19,29 +21,18 @@ * @opensearch.internal */ public class NodeDuressTrackers { + private final EnumMap duressTrackers; - private final NodeDuressTracker heapDuressTracker; - private final NodeDuressTracker cpuDuressTracker; - - public NodeDuressTrackers(final NodeDuressTracker heapDuressTracker, final NodeDuressTracker cpuDuressTracker) { - this.heapDuressTracker = heapDuressTracker; - this.cpuDuressTracker = cpuDuressTracker; - } - - /** - * Method to check the heap duress - * @return true if heap is in duress - */ - public boolean isHeapInDuress() { - return heapDuressTracker.test(); + public NodeDuressTrackers(EnumMap duressTrackers) { + this.duressTrackers = duressTrackers; } /** - * Method to check the CPU duress - * @return true if cpu is in duress + * Method to check the {@link ResourceType} in duress + * @return Boolean */ - public boolean isCPUInDuress() { - return cpuDuressTracker.test(); + public boolean isResourceInDuress(ResourceType resourceType) { + return duressTrackers.get(resourceType).test(); } /** @@ -49,7 +40,7 @@ public boolean isCPUInDuress() { * @return true if node is in duress because of either system resource */ public boolean isNodeInDuress() { - return isCPUInDuress() || isHeapInDuress(); + return isResourceInDuress(ResourceType.CPU) || isResourceInDuress(ResourceType.JVM); } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java index 2211d28ad30c0..0c2162f080517 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java @@ -8,6 +8,12 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.search.ResourceType; + +import java.util.function.Function; + +import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.isHeapTrackingSupported; + /** * Defines the type of TaskResourceUsageTracker. */ @@ -17,7 +23,6 @@ public enum TaskResourceUsageTrackerType { ELAPSED_TIME_TRACKER("elapsed_time_tracker"); private final String name; - TaskResourceUsageTrackerType(String name) { this.name = name; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 1d9f90b763626..9a085048cbff8 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -27,9 +27,6 @@ * @opensearch.internal */ public class TaskResourceUsageTrackers { - private TaskResourceUsageTracker cpuUsageTracker; - private TaskResourceUsageTracker heapUsageTracker; - private TaskResourceUsageTracker elapsedTimeTracker; private final EnumMap all; public TaskResourceUsageTrackers() { @@ -41,7 +38,6 @@ public TaskResourceUsageTrackers() { * @param cpuUsageTracker */ public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { - this.cpuUsageTracker = cpuUsageTracker; all.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, cpuUsageTracker); } @@ -50,7 +46,6 @@ public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { * @param heapUsageTracker */ public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) { - this.heapUsageTracker = heapUsageTracker; all.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, heapUsageTracker); } @@ -59,7 +54,6 @@ public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) * @param elapsedTimeTracker */ public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTracker) { - this.elapsedTimeTracker = elapsedTimeTracker; all.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, elapsedTimeTracker); } @@ -67,24 +61,8 @@ public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTrac * getter for cpuUsageTracker * @return */ - public Optional getCpuUsageTracker() { - return Optional.ofNullable(cpuUsageTracker); - } - - /** - * getter for heapUsageTacker - * @return - */ - public Optional getHeapUsageTracker() { - return Optional.ofNullable(heapUsageTracker); - } - - /** - * getter for elapsedTimeTracker - * @return - */ - public Optional getElapsedTimeTracker() { - return Optional.ofNullable(elapsedTimeTracker); + public Optional getTracker(TaskResourceUsageTrackerType type) { + return Optional.ofNullable(all.get(type)); } /** diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 289565f45041a..56a7835ccb136 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -16,6 +16,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.ResourceType; import org.opensearch.search.backpressure.settings.SearchBackpressureMode; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; import org.opensearch.search.backpressure.settings.SearchShardTaskSettings; @@ -41,17 +42,14 @@ import org.junit.Before; import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.util.*; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; +import static org.opensearch.search.ResourceType.CPU; +import static org.opensearch.search.ResourceType.JVM; import static org.opensearch.search.backpressure.SearchBackpressureTestHelpers.createMockTaskWithResourceStats; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyString; @@ -98,6 +96,12 @@ public void testIsNodeInDuress() { () -> 3 ); + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + { + put(ResourceType.JVM, heapUsageTracker); + put(ResourceType.CPU, cpuUsageTracker); + }}; + SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) @@ -108,7 +112,7 @@ public void testIsNodeInDuress() { mockTaskResourceTrackingService, threadPool, System::nanoTime, - new NodeDuressTrackers(heapUsageTracker, cpuUsageTracker), + new NodeDuressTrackers(duressTrackers), new TaskResourceUsageTrackers(), new TaskResourceUsageTrackers(), taskManager @@ -153,7 +157,7 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - new NodeDuressTrackers(null, null), + new NodeDuressTrackers(new EnumMap<>(ResourceType.class)), taskResourceUsageTrackers, new TaskResourceUsageTrackers(), taskManager @@ -186,7 +190,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - new NodeDuressTrackers(null, null), + new NodeDuressTrackers(new EnumMap<>(ResourceType.class)), new TaskResourceUsageTrackers(), taskResourceUsageTrackers, taskManager @@ -224,12 +228,19 @@ public void testSearchTaskInFlightCancellation() { // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); + NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3); + + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) {{ + put(JVM, heapUsageTracker); + put(CPU, mockNodeDuressTracker); + }}; + SearchBackpressureService service = new SearchBackpressureService( settings, mockTaskResourceTrackingService, threadPool, mockTimeNanosSupplier, - new NodeDuressTrackers(new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), mockNodeDuressTracker), + new NodeDuressTrackers(duressTrackers), taskResourceUsageTrackers, new TaskResourceUsageTrackers(), mockTaskManager @@ -292,10 +303,13 @@ public void testSearchShardTaskInFlightCancellation() { LongSupplier mockTimeNanosSupplier = mockTime::get; NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3); - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), - mockNodeDuressTracker - ); + EnumMap duressTrackers = + new EnumMap<>(ResourceType.class) { + { + put(JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); + put(CPU, mockNodeDuressTracker); + }}; + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(duressTrackers); TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, @@ -382,10 +396,14 @@ public void testNonCancellationOfHeapBasedTasksWhenHeapNotInDuress() { AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3), - new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) - ); + EnumMap duressTrackers = + new EnumMap<>(ResourceType.class) { + { + put(JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); + put(CPU, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); + }}; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(duressTrackers); // Creating heap and cpu usage trackers where heap tracker will always evaluate with reasons to cancel the // tasks but heap based cancellation should not happen because heap is not in duress diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java index a44ef446a32f5..b3d310789b54e 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java @@ -8,15 +8,20 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.search.ResourceType; import org.opensearch.test.OpenSearchTestCase; +import java.util.EnumMap; + public class NodeDuressTrackersTests extends OpenSearchTestCase { public void testNodeNotInDuress() { - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2), - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2) - ); + EnumMap map = new EnumMap<>(ResourceType.class) {{ + put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2)); + put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2)); + }}; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); assertFalse(nodeDuressTrackers.isNodeInDuress()); assertFalse(nodeDuressTrackers.isNodeInDuress()); @@ -24,10 +29,13 @@ public void testNodeNotInDuress() { } public void testNodeInDuressWhenHeapInDuress() { - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3), - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1) - ); + EnumMap map = new EnumMap<>(ResourceType.class) { + { + put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); + put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1)); + }}; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); assertFalse(nodeDuressTrackers.isNodeInDuress()); assertFalse(nodeDuressTrackers.isNodeInDuress()); @@ -37,10 +45,13 @@ public void testNodeInDuressWhenHeapInDuress() { } public void testNodeInDuressWhenCPUInDuress() { - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1), - new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) - ); + EnumMap map = new EnumMap<>(ResourceType.class) { + { + put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1)); + put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); + }}; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); assertFalse(nodeDuressTrackers.isNodeInDuress()); assertFalse(nodeDuressTrackers.isNodeInDuress()); @@ -50,10 +61,13 @@ public void testNodeInDuressWhenCPUInDuress() { } public void testNodeInDuressWhenCPUAndHeapInDuress() { - NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers( - new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3), - new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3) - ); + EnumMap map = new EnumMap<>(ResourceType.class) { + { + put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); + put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); + }}; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); assertFalse(nodeDuressTrackers.isNodeInDuress()); assertFalse(nodeDuressTrackers.isNodeInDuress()); From ea1be173b93616da0ec35330607575508f2fec72 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Tue, 21 May 2024 13:32:18 -0700 Subject: [PATCH 06/16] add entry in CHANGELOG Signed-off-by: Kaushal Kumar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fae8686d1e45d..281e93111dac6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Removed ### Fixed +- Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474)) - Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379)) - Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086)) From 69aaf3101d0dcdde68302dae3c3905d1660943d2 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Thu, 30 May 2024 16:11:04 -0700 Subject: [PATCH 07/16] address comments Signed-off-by: Kaushal Kumar --- .../SearchBackpressureService.java | 9 ++++--- .../backpressure/stats/SearchTaskStats.java | 8 +++--- .../trackers/NodeDuressTrackers.java | 7 +++++- .../TaskResourceUsageTrackerType.java | 6 ----- .../trackers/TaskResourceUsageTrackers.java | 25 +++++-------------- .../SearchBackpressureServiceTests.java | 12 ++++----- 6 files changed, 27 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 8436b2cdb9c9a..f04612d78bbde 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -359,21 +359,22 @@ public static TaskResourceUsageTrackers getTrackers( Setting windowSizeSetting ) { TaskResourceUsageTrackers trackers = new TaskResourceUsageTrackers(); - trackers.addCpuUsageTracker(new CpuUsageTracker(cpuThresholdSupplier)); + trackers.addTracker(new CpuUsageTracker(cpuThresholdSupplier), TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); if (isHeapTrackingSupported()) { - trackers.addHeapUsageTracker( + trackers.addTracker( new HeapUsageTracker( heapVarianceSupplier, heapPercentThresholdSupplier, heapMovingAverageWindowSize, clusterSettings, windowSizeSetting - ) + ), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER ); } else { logger.warn("heap size couldn't be determined"); } - trackers.addElapsedTimeTracker(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime)); + trackers.addTracker(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime), TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER); return trackers; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index 7453240de0577..467d5d1be4314 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -20,6 +20,7 @@ import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats; import java.io.IOException; import java.util.Map; @@ -33,7 +34,7 @@ public class SearchTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final long completionCount; - private final Map resourceUsageTrackerStats; + private final Map resourceUsageTrackerStats; public SearchTaskStats( long cancellationCount, @@ -56,7 +57,7 @@ public SearchTaskStats(StreamInput in) throws IOException { this.completionCount = -1; } - MapBuilder builder = new MapBuilder<>(); + MapBuilder builder = new MapBuilder<>(); builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); @@ -69,8 +70,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject("resource_tracker_stats"); for (Map.Entry< - TaskResourceUsageTrackerType, - TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { + TaskResourceUsageTrackerType, Stats> entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java index fe7cf722b34eb..da31161e418e4 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java @@ -40,7 +40,12 @@ public boolean isResourceInDuress(ResourceType resourceType) { * @return true if node is in duress because of either system resource */ public boolean isNodeInDuress() { - return isResourceInDuress(ResourceType.CPU) || isResourceInDuress(ResourceType.JVM); + for (ResourceType resourceType: ResourceType.values()) { + if (isResourceInDuress(resourceType)) { + return true; + } + } + return false; } /** diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java index 0c2162f080517..55df3c65909cd 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackerType.java @@ -8,12 +8,6 @@ package org.opensearch.search.backpressure.trackers; -import org.opensearch.search.ResourceType; - -import java.util.function.Function; - -import static org.opensearch.search.backpressure.trackers.HeapUsageTracker.isHeapTrackingSupported; - /** * Defines the type of TaskResourceUsageTracker. */ diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 9a085048cbff8..0afa9d95a4cad 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -33,32 +33,19 @@ public TaskResourceUsageTrackers() { all = new EnumMap<>(TaskResourceUsageTrackerType.class); } - /** - * adds the cpuUsageTracker - * @param cpuUsageTracker - */ - public void addCpuUsageTracker(final TaskResourceUsageTracker cpuUsageTracker) { - all.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, cpuUsageTracker); - } /** - * adds the heapUsageTracker - * @param heapUsageTracker + * adds the tracker for the TrackerType + * @param tracker + * @param trackerType */ - public void addHeapUsageTracker(final TaskResourceUsageTracker heapUsageTracker) { - all.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, heapUsageTracker); + public void addTracker(final TaskResourceUsageTracker tracker, final TaskResourceUsageTrackerType trackerType) { + all.put(trackerType, tracker); } - /** - * adds the elapsedTimeTracker - * @param elapsedTimeTracker - */ - public void addElapsedTimeTracker(final TaskResourceUsageTracker elapsedTimeTracker) { - all.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, elapsedTimeTracker); - } /** - * getter for cpuUsageTracker + * getter for tracker for a {@link TaskResourceUsageTrackerType} * @return */ public Optional getTracker(TaskResourceUsageTrackerType type) { diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 56a7835ccb136..8228bb2028bb4 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -145,7 +145,7 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceUsageTrackers.TaskResourceUsageTracker.class ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); + taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -178,7 +178,7 @@ public void testTrackerStateUpdateOnSearchShardTaskCompletion() { TaskResourceUsageTrackers.TaskResourceUsageTracker.class ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); + taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -223,7 +223,7 @@ public void testSearchTaskInFlightCancellation() { } ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); + taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); @@ -322,7 +322,7 @@ public void testSearchShardTaskInFlightCancellation() { } ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addCpuUsageTracker(mockTaskResourceUsageTracker); + taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); @@ -422,8 +422,8 @@ public void testNonCancellationOfHeapBasedTasksWhenHeapNotInDuress() { ); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); - taskResourceUsageTrackers.addCpuUsageTracker(cpuUsageTracker); - taskResourceUsageTrackers.addHeapUsageTracker(heapUsageTracker); + taskResourceUsageTrackers.addTracker(cpuUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); + taskResourceUsageTrackers.addTracker(heapUsageTracker, TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER); // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); From f6065888aaada03dac0c9faf2e62148714bb3f48 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Mon, 3 Jun 2024 16:38:30 -0700 Subject: [PATCH 08/16] address comments Signed-off-by: Kaushal Kumar --- .../search/backpressure/stats/SearchTaskStats.java | 11 +++++------ .../backpressure/trackers/NodeDuressTrackers.java | 6 +++--- .../backpressure/stats/SearchShardTaskStatsTests.java | 4 ++-- .../backpressure/stats/SearchTaskStatsTests.java | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index 467d5d1be4314..063ddcd961558 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -19,8 +19,7 @@ import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import java.io.IOException; import java.util.Map; @@ -34,13 +33,13 @@ public class SearchTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final long completionCount; - private final Map resourceUsageTrackerStats; + private final Map resourceUsageTrackerStats; public SearchTaskStats( long cancellationCount, long limitReachedCount, long completionCount, - Map resourceUsageTrackerStats + Map resourceUsageTrackerStats ) { this.cancellationCount = cancellationCount; this.limitReachedCount = limitReachedCount; @@ -57,7 +56,7 @@ public SearchTaskStats(StreamInput in) throws IOException { this.completionCount = -1; } - MapBuilder builder = new MapBuilder<>(); + MapBuilder builder = new MapBuilder<>(); builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); @@ -70,7 +69,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject("resource_tracker_stats"); for (Map.Entry< - TaskResourceUsageTrackerType, Stats> entry : resourceUsageTrackerStats.entrySet()) { + TaskResourceUsageTrackerType, TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java index da31161e418e4..51d66bfbb9f34 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackers.java @@ -11,7 +11,7 @@ import org.opensearch.common.util.Streak; import org.opensearch.search.ResourceType; -import java.util.EnumMap; +import java.util.Map; import java.util.function.BooleanSupplier; import java.util.function.IntSupplier; @@ -21,9 +21,9 @@ * @opensearch.internal */ public class NodeDuressTrackers { - private final EnumMap duressTrackers; + private final Map duressTrackers; - public NodeDuressTrackers(EnumMap duressTrackers) { + public NodeDuressTrackers(Map duressTrackers) { this.duressTrackers = duressTrackers; } diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java index e7057676ad89b..45a44136d41f7 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchShardTaskStatsTests.java @@ -13,7 +13,7 @@ import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.test.AbstractWireSerializingTestCase; import java.util.Map; @@ -30,7 +30,7 @@ protected SearchShardTaskStats createTestInstance() { } public static SearchShardTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( + Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, diff --git a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java index b187811d48247..3ac5cfd658fc3 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/stats/SearchTaskStatsTests.java @@ -13,7 +13,7 @@ import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.test.AbstractWireSerializingTestCase; import java.util.Map; @@ -31,7 +31,7 @@ protected SearchTaskStats createTestInstance() { } public static SearchTaskStats randomInstance() { - Map resourceUsageTrackerStats = Map.of( + Map resourceUsageTrackerStats = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, new CpuUsageTracker.Stats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()), TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, From 99af1121fe32db2fbc6b4937f0c876675eadbb39 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Mon, 3 Jun 2024 16:58:56 -0700 Subject: [PATCH 09/16] remove wildcard import Signed-off-by: Kaushal Kumar --- .../backpressure/SearchBackpressureServiceTests.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 8228bb2028bb4..b093ac1d44506 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -42,7 +42,13 @@ import org.junit.Before; import java.io.IOException; -import java.util.*; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; From 90f38ac4f23a7c614b68466153c87352dadb6804 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Tue, 4 Jun 2024 12:49:31 -0700 Subject: [PATCH 10/16] streamline imports Signed-off-by: Kaushal Kumar --- .../org/opensearch/search/ResourceType.java | 3 +- .../SearchBackpressureService.java | 92 ++++++++++--------- .../stats/SearchShardTaskStats.java | 12 +-- .../backpressure/stats/SearchTaskStats.java | 3 +- .../trackers/CpuUsageTracker.java | 7 +- .../trackers/ElapsedTimeTracker.java | 7 +- .../trackers/HeapUsageTracker.java | 7 +- .../trackers/NodeDuressTrackers.java | 2 +- .../TaskResourceUsageTrackerType.java | 1 + .../trackers/TaskResourceUsageTrackers.java | 2 - .../SearchBackpressureServiceTests.java | 83 ++++++++--------- .../trackers/NodeDuressTrackerTests.java | 3 +- .../trackers/NodeDuressTrackersTests.java | 38 ++++---- .../tasks/TaskCancellationTests.java | 14 +-- 14 files changed, 142 insertions(+), 132 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java index a54ea08467ac2..80b27a02d0056 100644 --- a/server/src/main/java/org/opensearch/search/ResourceType.java +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -13,12 +13,13 @@ public enum ResourceType { JVM("jvm"); private final String name; + ResourceType(String name) { this.name = name; } public static ResourceType fromName(String s) { - for (ResourceType resourceType: values()) { + for (ResourceType resourceType : values()) { if (resourceType.getName().equals(s)) { return resourceType; } diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index f04612d78bbde..cfe91a5198291 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -33,6 +33,7 @@ import org.opensearch.search.backpressure.trackers.NodeDuressTrackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.SearchBackpressureTask; import org.opensearch.tasks.Task; @@ -65,14 +66,18 @@ */ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); - private static final List> TRACKED_TASK_TYPES = List.of(SearchTask.class, SearchShardTask.class); - private static final Map> trackerApplyConditions = - Map.of( - TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (nodeDuressTrackers) -> nodeDuressTrackers.isResourceInDuress(ResourceType.CPU), - TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, (nodeDuressTrackers) -> - isHeapTrackingSupported() && nodeDuressTrackers.isResourceInDuress(ResourceType.JVM), - TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, (nodeDuressTrackers) -> true - ); + private static final List> TRACKED_TASK_TYPES = List.of( + SearchTask.class, + SearchShardTask.class + ); + private static final Map> trackerApplyConditions = Map.of( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + (nodeDuressTrackers) -> nodeDuressTrackers.isResourceInDuress(ResourceType.CPU), + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + (nodeDuressTrackers) -> isHeapTrackingSupported() && nodeDuressTrackers.isResourceInDuress(ResourceType.JVM), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, + (nodeDuressTrackers) -> true + ); private volatile Scheduler.Cancellable scheduledFuture; private final SearchBackpressureSettings settings; @@ -92,21 +97,26 @@ public SearchBackpressureService( ThreadPool threadPool, TaskManager taskManager ) { - this( - settings, - taskResourceTrackingService, - threadPool, - System::nanoTime, - new NodeDuressTrackers(new EnumMap<>(ResourceType.class) {{ - put(ResourceType.CPU, new NodeDuressTracker( - () -> ProcessProbe.getInstance().getProcessCpuPercent() / 100.0 >= settings.getNodeDuressSettings().getCpuThreshold(), - () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() - )); - put(ResourceType.JVM, new NodeDuressTracker( - () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings().getHeapThreshold(), - () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() - )); - }}), + this(settings, taskResourceTrackingService, threadPool, System::nanoTime, new NodeDuressTrackers(new EnumMap<>(ResourceType.class) { + { + put( + ResourceType.CPU, + new NodeDuressTracker( + () -> ProcessProbe.getInstance().getProcessCpuPercent() / 100.0 >= settings.getNodeDuressSettings() + .getCpuThreshold(), + () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() + ) + ); + put( + ResourceType.JVM, + new NodeDuressTracker( + () -> JvmStats.jvmStats().getMem().getHeapUsedPercent() / 100.0 >= settings.getNodeDuressSettings() + .getHeapThreshold(), + () -> settings.getNodeDuressSettings().getNumSuccessiveBreaches() + ) + ); + } + }), getTrackers( settings.getSearchTaskSettings()::getCpuTimeNanosThreshold, settings.getSearchTaskSettings()::getHeapVarianceThreshold, @@ -181,11 +191,12 @@ void doRun() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); - final Map, List> cancellableTasks = - Map.of( - SearchTask.class, searchTasks, - SearchShardTask.class, searchShardTasks - ); + final Map, List> cancellableTasks = Map.of( + SearchTask.class, + searchTasks, + SearchShardTask.class, + searchShardTasks + ); // Force-refresh usage stats of these tasks before making a cancellation decision. taskResourceTrackingService.refreshResourceStats(searchTasks.toArray(new Task[0])); @@ -193,13 +204,9 @@ void doRun() { List taskCancellations = new ArrayList<>(); - for (TaskResourceUsageTrackerType trackerType: TaskResourceUsageTrackerType.values()) { + for (TaskResourceUsageTrackerType trackerType : TaskResourceUsageTrackerType.values()) { if (shouldApply(trackerType)) { - addResourceTrackerBasedCancellations( - trackerType, - taskCancellations, - cancellableTasks - ); + addResourceTrackerBasedCancellations(trackerType, taskCancellations, cancellableTasks); } } @@ -248,9 +255,8 @@ private List addResourceTrackerBasedCancellations( List taskCancellations, Map, List> cancellableTasks ) { - for (Class taskType: TRACKED_TASK_TYPES) { - final Optional taskResourceTracker = - getTaskResourceUsageTrackersByType(taskType).getTracker(type); + for (Class taskType : TRACKED_TASK_TYPES) { + final Optional taskResourceTracker = getTaskResourceUsageTrackersByType(taskType).getTracker(type); addTaskCancellationsFromTaskResourceUsageTracker( taskCancellations, @@ -263,11 +269,10 @@ private List addResourceTrackerBasedCancellations( return taskCancellations; } - private void addTaskCancellationsFromTaskResourceUsageTracker( List taskCancellations, List tasks, - Optional taskResourceUsageTracker, + Optional taskResourceUsageTracker, Class type ) { taskResourceUsageTracker.ifPresent( @@ -369,12 +374,15 @@ public static TaskResourceUsageTrackers getTrackers( clusterSettings, windowSizeSetting ), - TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER ); } else { logger.warn("heap size couldn't be determined"); } - trackers.addTracker(new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime), TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER); + trackers.addTracker( + new ElapsedTimeTracker(ElapsedTimeNanosSupplier, System::nanoTime), + TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER + ); return trackers; } @@ -396,7 +404,7 @@ public void onTaskCompleted(Task task) { List exceptions = new ArrayList<>(); TaskResourceUsageTrackers trackers = getTaskResourceUsageTrackersByType(taskType); - for (TaskResourceUsageTrackers.TaskResourceUsageTracker tracker : trackers.all()) { + for (TaskResourceUsageTracker tracker : trackers.all()) { try { tracker.update(task); } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java index 5993ebd29e7cf..be714271c8919 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchShardTaskStats.java @@ -19,7 +19,7 @@ import org.opensearch.search.backpressure.trackers.ElapsedTimeTracker; import org.opensearch.search.backpressure.trackers.HeapUsageTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import java.io.IOException; import java.util.Map; @@ -32,13 +32,13 @@ public class SearchShardTaskStats implements ToXContentObject, Writeable { private final long cancellationCount; private final long limitReachedCount; private final long completionCount; - private final Map resourceUsageTrackerStats; + private final Map resourceUsageTrackerStats; public SearchShardTaskStats( long cancellationCount, long limitReachedCount, long completionCount, - Map resourceUsageTrackerStats + Map resourceUsageTrackerStats ) { this.cancellationCount = cancellationCount; this.limitReachedCount = limitReachedCount; @@ -55,7 +55,7 @@ public SearchShardTaskStats(StreamInput in) throws IOException { completionCount = -1; } - MapBuilder builder = new MapBuilder<>(); + MapBuilder builder = new MapBuilder<>(); builder.put(TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, in.readOptionalWriteable(CpuUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, in.readOptionalWriteable(HeapUsageTracker.Stats::new)); builder.put(TaskResourceUsageTrackerType.ELAPSED_TIME_TRACKER, in.readOptionalWriteable(ElapsedTimeTracker.Stats::new)); @@ -67,9 +67,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry< - TaskResourceUsageTrackerType, - TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java index 063ddcd961558..0f5f409b15def 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java +++ b/server/src/main/java/org/opensearch/search/backpressure/stats/SearchTaskStats.java @@ -68,8 +68,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.startObject("resource_tracker_stats"); - for (Map.Entry< - TaskResourceUsageTrackerType, TaskResourceUsageTracker.Stats> entry : resourceUsageTrackerStats.entrySet()) { + for (Map.Entry entry : resourceUsageTrackerStats.entrySet()) { builder.field(entry.getKey().getName(), entry.getValue()); } builder.endObject(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java index 331afaf7170c3..a303b625f4b59 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/CpuUsageTracker.java @@ -12,6 +12,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -29,7 +30,7 @@ * * @opensearch.internal */ -public class CpuUsageTracker extends TaskResourceUsageTrackers.TaskResourceUsageTracker { +public class CpuUsageTracker extends TaskResourceUsageTracker { private final LongSupplier thresholdSupplier; @@ -66,7 +67,7 @@ public String name() { } @Override - public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getCpuTimeInNanos()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg); @@ -75,7 +76,7 @@ public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long now = timeNanosSupplier.getAsLong(); long currentMax = activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> now - t.getStartTimeNanos()).average().orElse(0); @@ -81,7 +82,7 @@ public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List cancell } @Override - public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { long currentMax = activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).max().orElse(0); long currentAvg = (long) activeTasks.stream().mapToLong(t -> t.getTotalResourceStats().getMemoryInBytes()).average().orElse(0); return new Stats(getCancellations(), currentMax, currentAvg, (long) movingAverageReference.get().getAverage()); @@ -136,7 +137,7 @@ public TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats stats(List(TaskResourceUsageTrackerType.class); } - /** * adds the tracker for the TrackerType * @param tracker @@ -43,7 +42,6 @@ public void addTracker(final TaskResourceUsageTracker tracker, final TaskResourc all.put(trackerType, tracker); } - /** * getter for tracker for a {@link TaskResourceUsageTrackerType} * @return diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index b093ac1d44506..08c0e10c8e7d4 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -25,8 +25,10 @@ import org.opensearch.search.backpressure.stats.SearchShardTaskStats; import org.opensearch.search.backpressure.stats.SearchTaskStats; import org.opensearch.search.backpressure.trackers.NodeDuressTrackers; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers.NodeDuressTracker; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackerType; import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskCancellation; @@ -93,20 +95,15 @@ public void testIsNodeInDuress() { AtomicReference cpuUsage = new AtomicReference<>(); AtomicReference heapUsage = new AtomicReference<>(); - NodeDuressTrackers.NodeDuressTracker cpuUsageTracker = new NodeDuressTrackers.NodeDuressTracker( - () -> cpuUsage.get() >= 0.5, - () -> 3 - ); - NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker( - () -> heapUsage.get() >= 0.5, - () -> 3 - ); + NodeDuressTracker cpuUsageTracker = new NodeDuressTracker(() -> cpuUsage.get() >= 0.5, () -> 3); + NodeDuressTracker heapUsageTracker = new NodeDuressTracker(() -> heapUsage.get() >= 0.5, () -> 3); - EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { { put(ResourceType.JVM, heapUsageTracker); put(ResourceType.CPU, cpuUsageTracker); - }}; + } + }; SearchBackpressureSettings settings = new SearchBackpressureSettings( Settings.EMPTY, @@ -147,9 +144,7 @@ public void testIsNodeInDuress() { public void testTrackerStateUpdateOnSearchTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock( - TaskResourceUsageTrackers.TaskResourceUsageTracker.class - ); + TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); @@ -180,9 +175,7 @@ public void testTrackerStateUpdateOnSearchTaskCompletion() { public void testTrackerStateUpdateOnSearchShardTaskCompletion() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); LongSupplier mockTimeNanosSupplier = () -> TimeUnit.SECONDS.toNanos(1234); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = mock( - TaskResourceUsageTrackers.TaskResourceUsageTracker.class - ); + TaskResourceUsageTracker mockTaskResourceUsageTracker = mock(TaskResourceUsageTracker.class); TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); taskResourceUsageTrackers.addTracker(mockTaskResourceUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); @@ -216,9 +209,9 @@ public void testSearchTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3); + NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true, () -> 3); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (task) -> { if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { @@ -234,12 +227,14 @@ public void testSearchTaskInFlightCancellation() { // Mocking 'settings' with predictable rate limiting thresholds. SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 5.0); - NodeDuressTrackers.NodeDuressTracker heapUsageTracker = new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3); + NodeDuressTracker heapUsageTracker = new NodeDuressTracker(() -> false, () -> 3); - EnumMap duressTrackers = new EnumMap<>(ResourceType.class) {{ - put(JVM, heapUsageTracker); - put(CPU, mockNodeDuressTracker); - }}; + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + { + put(JVM, heapUsageTracker); + put(CPU, mockNodeDuressTracker); + } + }; SearchBackpressureService service = new SearchBackpressureService( settings, @@ -307,17 +302,17 @@ public void testSearchShardTaskInFlightCancellation() { TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - NodeDuressTrackers.NodeDuressTracker mockNodeDuressTracker = new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3); - - EnumMap duressTrackers = - new EnumMap<>(ResourceType.class) { - { - put(JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); - put(CPU, mockNodeDuressTracker); - }}; + NodeDuressTracker mockNodeDuressTracker = new NodeDuressTracker(() -> true, () -> 3); + + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + { + put(JVM, new NodeDuressTracker(() -> false, () -> 3)); + put(CPU, mockNodeDuressTracker); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(duressTrackers); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTracker mockTaskResourceUsageTracker = getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (task) -> { if (task.getTotalResourceStats().getCpuTimeInNanos() < 300) { @@ -402,22 +397,22 @@ public void testNonCancellationOfHeapBasedTasksWhenHeapNotInDuress() { AtomicLong mockTime = new AtomicLong(0); LongSupplier mockTimeNanosSupplier = mockTime::get; - EnumMap duressTrackers = - new EnumMap<>(ResourceType.class) { - { - put(JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); - put(CPU, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); - }}; + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + { + put(JVM, new NodeDuressTracker(() -> false, () -> 3)); + put(CPU, new NodeDuressTracker(() -> true, () -> 3)); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(duressTrackers); // Creating heap and cpu usage trackers where heap tracker will always evaluate with reasons to cancel the // tasks but heap based cancellation should not happen because heap is not in duress - TaskResourceUsageTrackers.TaskResourceUsageTracker heapUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTracker heapUsageTracker = getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, (task) -> Optional.of(new TaskCancellation.Reason("mem exceeded", 10)) ); - TaskResourceUsageTrackers.TaskResourceUsageTracker cpuUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTracker cpuUsageTracker = getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (task) -> { if (task.getTotalResourceStats().getCpuTimeInNanos() < 400) { @@ -499,11 +494,11 @@ private SearchBackpressureSettings getBackpressureSettings(String mode, double r ); } - private TaskResourceUsageTrackers.TaskResourceUsageTracker getMockedTaskResourceUsageTracker( + private TaskResourceUsageTracker getMockedTaskResourceUsageTracker( TaskResourceUsageTrackerType type, - TaskResourceUsageTrackers.TaskResourceUsageTracker.ResourceUsageBreachEvaluator evaluator + TaskResourceUsageTracker.ResourceUsageBreachEvaluator evaluator ) { - return new TaskResourceUsageTrackers.TaskResourceUsageTracker() { + return new TaskResourceUsageTracker() { @Override public String name() { return type.getName(); @@ -524,7 +519,7 @@ public Stats stats(List tasks) { }; } - private static class MockStats implements TaskResourceUsageTrackers.TaskResourceUsageTracker.Stats { + private static class MockStats implements TaskResourceUsageTracker.Stats { private final long cancellationCount; public MockStats(long cancellationCount) { diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java index 747bcd41209b9..32aca6ac3230e 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackerTests.java @@ -8,6 +8,7 @@ package org.opensearch.search.backpressure.trackers; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers.NodeDuressTracker; import org.opensearch.test.OpenSearchTestCase; import java.util.concurrent.atomic.AtomicReference; @@ -16,7 +17,7 @@ public class NodeDuressTrackerTests extends OpenSearchTestCase { public void testNodeDuressTracker() { AtomicReference cpuUsage = new AtomicReference<>(0.0); - NodeDuressTrackers.NodeDuressTracker tracker = new NodeDuressTrackers.NodeDuressTracker(() -> cpuUsage.get() >= 0.5, () -> 3); + NodeDuressTracker tracker = new NodeDuressTracker(() -> cpuUsage.get() >= 0.5, () -> 3); // Node not in duress. assertFalse(tracker.test()); diff --git a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java index b3d310789b54e..2db251ee461db 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/trackers/NodeDuressTrackersTests.java @@ -9,6 +9,7 @@ package org.opensearch.search.backpressure.trackers; import org.opensearch.search.ResourceType; +import org.opensearch.search.backpressure.trackers.NodeDuressTrackers.NodeDuressTracker; import org.opensearch.test.OpenSearchTestCase; import java.util.EnumMap; @@ -16,10 +17,12 @@ public class NodeDuressTrackersTests extends OpenSearchTestCase { public void testNodeNotInDuress() { - EnumMap map = new EnumMap<>(ResourceType.class) {{ - put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2)); - put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 2)); - }}; + EnumMap map = new EnumMap<>(ResourceType.class) { + { + put(ResourceType.JVM, new NodeDuressTracker(() -> false, () -> 2)); + put(ResourceType.CPU, new NodeDuressTracker(() -> false, () -> 2)); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); @@ -29,11 +32,12 @@ public void testNodeNotInDuress() { } public void testNodeInDuressWhenHeapInDuress() { - EnumMap map = new EnumMap<>(ResourceType.class) { + EnumMap map = new EnumMap<>(ResourceType.class) { { - put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); - put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1)); - }}; + put(ResourceType.JVM, new NodeDuressTracker(() -> true, () -> 3)); + put(ResourceType.CPU, new NodeDuressTracker(() -> false, () -> 1)); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); @@ -45,11 +49,12 @@ public void testNodeInDuressWhenHeapInDuress() { } public void testNodeInDuressWhenCPUInDuress() { - EnumMap map = new EnumMap<>(ResourceType.class) { + EnumMap map = new EnumMap<>(ResourceType.class) { { - put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 1)); - put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); - }}; + put(ResourceType.JVM, new NodeDuressTracker(() -> false, () -> 1)); + put(ResourceType.CPU, new NodeDuressTracker(() -> true, () -> 3)); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); @@ -61,11 +66,12 @@ public void testNodeInDuressWhenCPUInDuress() { } public void testNodeInDuressWhenCPUAndHeapInDuress() { - EnumMap map = new EnumMap<>(ResourceType.class) { + EnumMap map = new EnumMap<>(ResourceType.class) { { - put(ResourceType.JVM, new NodeDuressTrackers.NodeDuressTracker(() -> true, () -> 3)); - put(ResourceType.CPU, new NodeDuressTrackers.NodeDuressTracker(() -> false, () -> 3)); - }}; + put(ResourceType.JVM, new NodeDuressTracker(() -> true, () -> 3)); + put(ResourceType.CPU, new NodeDuressTracker(() -> false, () -> 3)); + } + }; NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(map); diff --git a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java index d501148c17a76..f08c12ea258ca 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskCancellationTests.java @@ -9,7 +9,7 @@ package org.opensearch.tasks; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers; +import org.opensearch.search.backpressure.trackers.TaskResourceUsageTrackers.TaskResourceUsageTracker; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; @@ -22,9 +22,9 @@ public class TaskCancellationTests extends OpenSearchTestCase { public void testTaskCancellation() { SearchShardTask mockTask = new SearchShardTask(123L, "", "", "", null, Collections.emptyMap()); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker1 = createMockTaskResourceUsageTracker("mock_tracker_1"); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker2 = createMockTaskResourceUsageTracker("mock_tracker_2"); - TaskResourceUsageTrackers.TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); + TaskResourceUsageTracker mockTracker1 = createMockTaskResourceUsageTracker("mock_tracker_1"); + TaskResourceUsageTracker mockTracker2 = createMockTaskResourceUsageTracker("mock_tracker_2"); + TaskResourceUsageTracker mockTracker3 = createMockTaskResourceUsageTracker("mock_tracker_3"); List reasons = new ArrayList<>(); List callbacks = List.of(mockTracker1::incrementCancellations, mockTracker2::incrementCancellations); @@ -53,8 +53,8 @@ public void testTaskCancellation() { assertEquals(0, mockTracker3.getCancellations()); } - private static TaskResourceUsageTrackers.TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { - return new TaskResourceUsageTrackers.TaskResourceUsageTracker() { + private static TaskResourceUsageTracker createMockTaskResourceUsageTracker(String name) { + return new TaskResourceUsageTracker() { @Override public String name() { return name; @@ -69,7 +69,7 @@ public Optional checkAndMaybeGetCancellationReason(Task } @Override - public Stats stats(List activeTasks) { + public TaskResourceUsageTracker.Stats stats(List activeTasks) { return null; } }; From b8dba28484e2f738f9568371c98dae44c588333a Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Wed, 12 Jun 2024 08:21:44 -0700 Subject: [PATCH 11/16] address comments Signed-off-by: Kaushal Kumar --- .../SearchBackpressureService.java | 53 +++++++++---------- .../trackers/TaskResourceUsageTrackers.java | 11 ++-- 2 files changed, 28 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index cfe91a5198291..fe1cf4427b32a 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -46,6 +46,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.EnumMap; import java.util.HashMap; import java.util.List; @@ -66,10 +67,6 @@ */ public class SearchBackpressureService extends AbstractLifecycleComponent implements TaskCompletionListener { private static final Logger logger = LogManager.getLogger(SearchBackpressureService.class); - private static final List> TRACKED_TASK_TYPES = List.of( - SearchTask.class, - SearchShardTask.class - ); private static final Map> trackerApplyConditions = Map.of( TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, (nodeDuressTrackers) -> nodeDuressTrackers.isResourceInDuress(ResourceType.CPU), @@ -139,7 +136,7 @@ public SearchBackpressureService( ); } - public SearchBackpressureService( + SearchBackpressureService( SearchBackpressureSettings settings, TaskResourceTrackingService taskResourceTrackingService, ThreadPool threadPool, @@ -191,11 +188,20 @@ void doRun() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); + + boolean isHeapUsageDominatedBySearchTasks = HeapUsageTracker.isHeapUsageDominatedBySearch( + searchTasks, + getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() + ); + boolean isHeapUsageDominatedBySearchShardTasks = HeapUsageTracker.isHeapUsageDominatedBySearch( + searchTasks, + getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() + ); final Map, List> cancellableTasks = Map.of( SearchTask.class, - searchTasks, + isHeapUsageDominatedBySearchTasks ? searchTasks : Collections.emptyList(), SearchShardTask.class, - searchShardTasks + isHeapUsageDominatedBySearchShardTasks ? searchShardTasks : Collections.emptyList() ); // Force-refresh usage stats of these tasks before making a cancellation decision. @@ -255,33 +261,24 @@ private List addResourceTrackerBasedCancellations( List taskCancellations, Map, List> cancellableTasks ) { - for (Class taskType : TRACKED_TASK_TYPES) { - final Optional taskResourceTracker = getTaskResourceUsageTrackersByType(taskType).getTracker(type); - - addTaskCancellationsFromTaskResourceUsageTracker( - taskCancellations, - cancellableTasks.get(taskType), - taskResourceTracker, - taskType + for (Map.Entry, TaskResourceUsageTrackers> taskResourceUsageTrackers : taskTrackers + .entrySet()) { + final Optional taskResourceUsageTracker = taskResourceUsageTrackers.getValue().getTracker(type); + final Class taskType = taskResourceUsageTrackers.getKey(); + + taskResourceUsageTracker.ifPresent( + tracker -> taskCancellations.addAll( + tracker.getTaskCancellations( + cancellableTasks.get(taskType), + searchBackpressureStates.get(taskType)::incrementCancellationCount + ) + ) ); } return taskCancellations; } - private void addTaskCancellationsFromTaskResourceUsageTracker( - List taskCancellations, - List tasks, - Optional taskResourceUsageTracker, - Class type - ) { - taskResourceUsageTracker.ifPresent( - tracker -> taskCancellations.addAll( - tracker.getTaskCancellations(tasks, searchBackpressureStates.get(type)::incrementCancellationCount) - ) - ); - } - /** * returns the taskTrackers for given type * @param type diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 399ee3b6a7187..f4bfdd29b0cf7 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -115,22 +115,17 @@ public Optional checkAndMaybeGetCancellationReason(Task */ public List getTaskCancellations(List tasks, Runnable cancellationCallback) { return tasks.stream() - .map(task -> this.getTaskCancellation(task, cancellationCallback)) + .map(task -> this.getTaskCancellation(task, List.of(cancellationCallback, this::incrementCancellations))) .filter(TaskCancellation::isEligibleForCancellation) - .map(taskCancellation -> { - List onCancelCallbacks = new ArrayList<>(taskCancellation.getOnCancelCallbacks()); - onCancelCallbacks.add(this::incrementCancellations); - return new TaskCancellation(taskCancellation.getTask(), taskCancellation.getReasons(), onCancelCallbacks); - }) .collect(Collectors.toList()); } - private TaskCancellation getTaskCancellation(final CancellableTask task, final Runnable cancellationCallback) { + private TaskCancellation getTaskCancellation(final CancellableTask task, final List cancellationCallback) { Optional reason = checkAndMaybeGetCancellationReason(task); List reasons = new ArrayList<>(); reason.ifPresent(reasons::add); - return new TaskCancellation(task, reasons, List.of(cancellationCallback)); + return new TaskCancellation(task, reasons, cancellationCallback); } /** From 0c7043deca6d4423377971ae9bf8adb2e5b6e16f Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Mon, 17 Jun 2024 12:32:12 -0700 Subject: [PATCH 12/16] add additional test case to test the circuit breaker for SBP logic Signed-off-by: Kaushal Kumar --- .../SearchBackpressureService.java | 53 +++++++----- .../trackers/TaskResourceUsageTrackers.java | 5 +- .../SearchBackpressureServiceTests.java | 83 +++++++++++++++++++ 3 files changed, 117 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index fe1cf4427b32a..12152cb1b5e43 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -189,12 +189,12 @@ void doRun() { List searchTasks = getTaskByType(SearchTask.class); List searchShardTasks = getTaskByType(SearchShardTask.class); - boolean isHeapUsageDominatedBySearchTasks = HeapUsageTracker.isHeapUsageDominatedBySearch( + boolean isHeapUsageDominatedBySearchTasks = isHeapUsageDominatedBySearch( searchTasks, getSettings().getSearchTaskSettings().getTotalHeapPercentThreshold() ); - boolean isHeapUsageDominatedBySearchShardTasks = HeapUsageTracker.isHeapUsageDominatedBySearch( - searchTasks, + boolean isHeapUsageDominatedBySearchShardTasks = isHeapUsageDominatedBySearch( + searchShardTasks, getSettings().getSearchShardTaskSettings().getTotalHeapPercentThreshold() ); final Map, List> cancellableTasks = Map.of( @@ -219,6 +219,7 @@ void doRun() { // Since these cancellations might be duplicate due to multiple trackers causing cancellation for same task // We need to merge them taskCancellations = mergeTaskCancellations(taskCancellations).stream() + .map(this::addSBPStateUpdateCallback) .filter(TaskCancellation::isEligibleForCancellation) .collect(Collectors.toList()); @@ -252,6 +253,28 @@ void doRun() { } } + /** + * Had to define this method to help mock this static method to test the scenario where SearchTraffic should not be + * penalised when not breaching the threshold + * @param searchTasks + * @param threshold + * @return + */ + boolean isHeapUsageDominatedBySearch(List searchTasks, double threshold) { + return HeapUsageTracker.isHeapUsageDominatedBySearch(searchTasks, threshold); + } + + private TaskCancellation addSBPStateUpdateCallback(TaskCancellation taskCancellation) { + CancellableTask task = taskCancellation.getTask(); + Runnable toAddCancellationCallbackForSBPState = searchBackpressureStates.get(SearchShardTask.class)::incrementCancellationCount; + if (task instanceof SearchTask) { + toAddCancellationCallbackForSBPState = searchBackpressureStates.get(SearchTask.class)::incrementCancellationCount; + } + List newOnCancelCallbacks = new ArrayList<>(taskCancellation.getOnCancelCallbacks()); + newOnCancelCallbacks.add(toAddCancellationCallbackForSBPState); + return new TaskCancellation(task, taskCancellation.getReasons(), newOnCancelCallbacks); + } + private boolean shouldApply(TaskResourceUsageTrackerType trackerType) { return trackerApplyConditions.get(trackerType).apply(nodeDuressTrackers); } @@ -267,27 +290,13 @@ private List addResourceTrackerBasedCancellations( final Class taskType = taskResourceUsageTrackers.getKey(); taskResourceUsageTracker.ifPresent( - tracker -> taskCancellations.addAll( - tracker.getTaskCancellations( - cancellableTasks.get(taskType), - searchBackpressureStates.get(taskType)::incrementCancellationCount - ) - ) + tracker -> taskCancellations.addAll(tracker.getTaskCancellations(cancellableTasks.get(taskType))) ); } return taskCancellations; } - /** - * returns the taskTrackers for given type - * @param type - * @return - */ - private TaskResourceUsageTrackers getTaskResourceUsageTrackersByType(Class type) { - return taskTrackers.get(type); - } - /** * Method to reduce the taskCancellations into unique bunch * @param taskCancellations @@ -400,7 +409,7 @@ public void onTaskCompleted(Task task) { } List exceptions = new ArrayList<>(); - TaskResourceUsageTrackers trackers = getTaskResourceUsageTrackersByType(taskType); + TaskResourceUsageTrackers trackers = taskTrackers.get(taskType); for (TaskResourceUsageTracker tracker : trackers.all()) { try { tracker.update(task); @@ -440,7 +449,8 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchTask.class).getCancellationCount(), searchBackpressureStates.get(SearchTask.class).getLimitReachedCount(), searchBackpressureStates.get(SearchTask.class).getCompletionCount(), - getTaskResourceUsageTrackersByType(SearchTask.class).all() + taskTrackers.get(SearchTask.class) + .all() .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchTasks))) ); @@ -449,7 +459,8 @@ public SearchBackpressureStats nodeStats() { searchBackpressureStates.get(SearchShardTask.class).getCancellationCount(), searchBackpressureStates.get(SearchShardTask.class).getLimitReachedCount(), searchBackpressureStates.get(SearchShardTask.class).getCompletionCount(), - getTaskResourceUsageTrackersByType(SearchShardTask.class).all() + taskTrackers.get(SearchShardTask.class) + .all() .stream() .collect(Collectors.toUnmodifiableMap(t -> TaskResourceUsageTrackerType.fromName(t.name()), t -> t.stats(searchShardTasks))) ); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index f4bfdd29b0cf7..d7ff47b4c6049 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -110,12 +110,11 @@ public Optional checkAndMaybeGetCancellationReason(Task /** * Method to get taskCancellations due to this tracker for the given {@link CancellableTask} tasks * @param tasks - * @param cancellationCallback * @return */ - public List getTaskCancellations(List tasks, Runnable cancellationCallback) { + public List getTaskCancellations(List tasks) { return tasks.stream() - .map(task -> this.getTaskCancellation(task, List.of(cancellationCallback, this::incrementCancellations))) + .map(task -> this.getTaskCancellation(task, List.of(this::incrementCancellations))) .filter(TaskCancellation::isEligibleForCancellation) .collect(Collectors.toList()); } diff --git a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java index 08c0e10c8e7d4..43df482fcc2ae 100644 --- a/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java +++ b/server/src/test/java/org/opensearch/search/backpressure/SearchBackpressureServiceTests.java @@ -62,6 +62,8 @@ import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyDouble; +import static org.mockito.Mockito.anyList; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -485,6 +487,87 @@ public void testNonCancellationOfHeapBasedTasksWhenHeapNotInDuress() { assertEquals(expectedStats, actualStats); } + public void testNonCancellationWhenSearchTrafficIsNotQualifyingForCancellation() { + TaskManager mockTaskManager = spy(taskManager); + TaskResourceTrackingService mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); + AtomicLong mockTime = new AtomicLong(0); + LongSupplier mockTimeNanosSupplier = mockTime::get; + + EnumMap duressTrackers = new EnumMap<>(ResourceType.class) { + { + put(JVM, new NodeDuressTracker(() -> false, () -> 3)); + put(CPU, new NodeDuressTracker(() -> true, () -> 3)); + } + }; + + NodeDuressTrackers nodeDuressTrackers = new NodeDuressTrackers(duressTrackers); + + // Creating heap and cpu usage trackers where heap tracker will always evaluate with reasons to cancel the + // tasks but heap based cancellation should not happen because heap is not in duress + TaskResourceUsageTracker heapUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER, + (task) -> Optional.of(new TaskCancellation.Reason("mem exceeded", 10)) + ); + TaskResourceUsageTracker cpuUsageTracker = getMockedTaskResourceUsageTracker( + TaskResourceUsageTrackerType.CPU_USAGE_TRACKER, + (task) -> { + if (task.getTotalResourceStats().getCpuTimeInNanos() < 400) { + return Optional.empty(); + } + return Optional.of(new TaskCancellation.Reason("cpu time limit exceeded", 5)); + } + ); + + TaskResourceUsageTrackers taskResourceUsageTrackers = new TaskResourceUsageTrackers(); + taskResourceUsageTrackers.addTracker(cpuUsageTracker, TaskResourceUsageTrackerType.CPU_USAGE_TRACKER); + taskResourceUsageTrackers.addTracker(heapUsageTracker, TaskResourceUsageTrackerType.HEAP_USAGE_TRACKER); + + // Mocking 'settings' with predictable rate limiting thresholds. + SearchBackpressureSettings settings = getBackpressureSettings("enforced", 0.1, 0.003, 10.0); + + SearchBackpressureService service = spy( + new SearchBackpressureService( + settings, + mockTaskResourceTrackingService, + threadPool, + mockTimeNanosSupplier, + nodeDuressTrackers, + taskResourceUsageTrackers, + new TaskResourceUsageTrackers(), + mockTaskManager + ) + ); + + when(service.isHeapUsageDominatedBySearch(anyList(), anyDouble())).thenReturn(false); + + service.doRun(); + service.doRun(); + + SearchTaskSettings searchTaskSettings = mock(SearchTaskSettings.class); + // setting the total heap percent threshold to minimum so that circuit does not break in SearchBackpressureService + when(searchTaskSettings.getTotalHeapPercentThreshold()).thenReturn(0.0); + when(settings.getSearchTaskSettings()).thenReturn(searchTaskSettings); + + // Create a mix of low and high resource usage tasks (60 low + 15 high resource usage tasks). + Map activeSearchTasks = new HashMap<>(); + for (long i = 0; i < 75; i++) { + Class taskType = randomBoolean() ? SearchTask.class : SearchShardTask.class; + if (i % 5 == 0) { + activeSearchTasks.put(i, createMockTaskWithResourceStats(taskType, 500, 800, i)); + } else { + activeSearchTasks.put(i, createMockTaskWithResourceStats(taskType, 100, 800, i)); + } + } + doReturn(activeSearchTasks).when(mockTaskResourceTrackingService).getResourceAwareTasks(); + + // this will trigger cancellation but the cancellation should not happen as the node is not is duress because of search traffic + service.doRun(); + + verify(mockTaskManager, times(0)).cancelTaskAndDescendants(any(), anyString(), anyBoolean(), any()); + assertEquals(0, service.getSearchBackpressureState(SearchTask.class).getCancellationCount()); + assertEquals(0, service.getSearchBackpressureState(SearchShardTask.class).getCancellationCount()); + } + private SearchBackpressureSettings getBackpressureSettings(String mode, double ratio, double rate, double burst) { return spy( new SearchBackpressureSettings( From 214febafc55c7ebf6b292869741c6a03d750f2eb Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Thu, 20 Jun 2024 11:36:32 -0700 Subject: [PATCH 13/16] add missing javadoc to resourece type enum Signed-off-by: Kaushal Kumar --- server/src/main/java/org/opensearch/search/ResourceType.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java index 80b27a02d0056..9c33a94a66a7d 100644 --- a/server/src/main/java/org/opensearch/search/ResourceType.java +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -8,6 +8,9 @@ package org.opensearch.search; +/** + * Enum to hold the resource type + */ public enum ResourceType { CPU("cpu"), JVM("jvm"); From becc022e06cd9216d6a636470d72a7f85d05227f Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Thu, 20 Jun 2024 15:54:36 -0700 Subject: [PATCH 14/16] add javadoc to a method Signed-off-by: Kaushal Kumar --- server/src/main/java/org/opensearch/search/ResourceType.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java index 9c33a94a66a7d..1c86ad03e3516 100644 --- a/server/src/main/java/org/opensearch/search/ResourceType.java +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -21,6 +21,11 @@ public enum ResourceType { this.name = name; } + /** + * The string match here is case-sensitive + * @param s + * @return a {@link ResourceType} + */ public static ResourceType fromName(String s) { for (ResourceType resourceType : values()) { if (resourceType.getName().equals(s)) { From bd55e42440c48b5feb5714eb91818135de2b0f40 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Fri, 21 Jun 2024 06:35:34 -0700 Subject: [PATCH 15/16] fix javadoc warnings Signed-off-by: Kaushal Kumar --- .../java/org/opensearch/search/ResourceType.java | 2 +- .../trackers/TaskResourceUsageTrackers.java | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/ResourceType.java b/server/src/main/java/org/opensearch/search/ResourceType.java index 1c86ad03e3516..5bbcd7de1c2ce 100644 --- a/server/src/main/java/org/opensearch/search/ResourceType.java +++ b/server/src/main/java/org/opensearch/search/ResourceType.java @@ -23,7 +23,7 @@ public enum ResourceType { /** * The string match here is case-sensitive - * @param s + * @param s name matching the resource type name * @return a {@link ResourceType} */ public static ResourceType fromName(String s) { diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index d7ff47b4c6049..3a370a319cee5 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -35,8 +35,8 @@ public TaskResourceUsageTrackers() { /** * adds the tracker for the TrackerType - * @param tracker - * @param trackerType + * @param tracker is {@link TaskResourceUsageTracker} implementation which will be added + * @param trackerType is {@link TaskResourceUsageTrackerType} which depicts the implementation type */ public void addTracker(final TaskResourceUsageTracker tracker, final TaskResourceUsageTrackerType trackerType) { all.put(trackerType, tracker); @@ -44,7 +44,8 @@ public void addTracker(final TaskResourceUsageTracker tracker, final TaskResourc /** * getter for tracker for a {@link TaskResourceUsageTrackerType} - * @return + * @param type for which the implementation is returned + * @return the {@link TaskResourceUsageTrackerType} */ public Optional getTracker(TaskResourceUsageTrackerType type) { return Optional.ofNullable(all.get(type)); @@ -52,7 +53,7 @@ public Optional getTracker(TaskResourceUsageTrackerTyp /** * Method to access all available {@link TaskResourceUsageTracker} - * @return + * @return all enabled and available {@link TaskResourceUsageTracker}s */ public List all() { return new ArrayList<>(all.values()); @@ -71,7 +72,7 @@ public static abstract class TaskResourceUsageTracker { /** * for test purposes only - * @param resourceUsageBreachEvaluator + * @param resourceUsageBreachEvaluator which suggests whether a task should be cancelled or not */ public void setResourceUsageBreachEvaluator(final ResourceUsageBreachEvaluator resourceUsageBreachEvaluator) { this.resourceUsageBreachEvaluator = resourceUsageBreachEvaluator; @@ -109,7 +110,7 @@ public Optional checkAndMaybeGetCancellationReason(Task /** * Method to get taskCancellations due to this tracker for the given {@link CancellableTask} tasks - * @param tasks + * @param tasks cancellation eligible tasks due to node duress and search traffic threshold breach * @return */ public List getTaskCancellations(List tasks) { @@ -138,7 +139,7 @@ public interface Stats extends ToXContentObject, Writeable {} public interface ResourceUsageBreachEvaluator { /** * evaluates whether the task is eligible for cancellation based on {@link TaskResourceUsageTracker} implementation - * @param task + * @param task is input to this method on which the cancellation evaluation is performed * @return a {@link TaskCancellation.Reason} why this task should be cancelled otherwise empty */ public Optional evaluate(final Task task); From 0e38dee0d6c27ae12ca3175b1b75dea384126877 Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Fri, 21 Jun 2024 06:48:29 -0700 Subject: [PATCH 16/16] fix javadoc warnings Signed-off-by: Kaushal Kumar --- .../search/backpressure/SearchBackpressureService.java | 10 +++++----- .../trackers/TaskResourceUsageTrackers.java | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index 12152cb1b5e43..3e8ed3070e4ef 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -256,9 +256,9 @@ void doRun() { /** * Had to define this method to help mock this static method to test the scenario where SearchTraffic should not be * penalised when not breaching the threshold - * @param searchTasks - * @param threshold - * @return + * @param searchTasks inFlight co-ordinator requests + * @param threshold miniumum jvm allocated bytes ratio w.r.t. available heap + * @return a boolean value based on whether the threshold is breached */ boolean isHeapUsageDominatedBySearch(List searchTasks, double threshold) { return HeapUsageTracker.isHeapUsageDominatedBySearch(searchTasks, threshold); @@ -299,8 +299,8 @@ private List addResourceTrackerBasedCancellations( /** * Method to reduce the taskCancellations into unique bunch - * @param taskCancellations - * @return + * @param taskCancellations all task cancellations + * @return unique task cancellations */ private List mergeTaskCancellations(final List taskCancellations) { final Map uniqueTaskCancellations = new HashMap<>(); diff --git a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java index 3a370a319cee5..3b0072288681c 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java +++ b/server/src/main/java/org/opensearch/search/backpressure/trackers/TaskResourceUsageTrackers.java @@ -111,7 +111,7 @@ public Optional checkAndMaybeGetCancellationReason(Task /** * Method to get taskCancellations due to this tracker for the given {@link CancellableTask} tasks * @param tasks cancellation eligible tasks due to node duress and search traffic threshold breach - * @return + * @return the list of tasks which are breaching task level thresholds for this {@link TaskResourceUsageTracker} */ public List getTaskCancellations(List tasks) { return tasks.stream()