diff --git a/osbenchmark/tuning/__init__.py b/osbenchmark/tuning/__init__.py index e69de29bb..e7a26e8e3 100644 --- a/osbenchmark/tuning/__init__.py +++ b/osbenchmark/tuning/__init__.py @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/osbenchmark/tuning/optimal_finder.py b/osbenchmark/tuning/optimal_finder.py index 17396e283..67d772502 100644 --- a/osbenchmark/tuning/optimal_finder.py +++ b/osbenchmark/tuning/optimal_finder.py @@ -1,3 +1,27 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import os import sys import csv @@ -20,7 +44,10 @@ def get_benchmark_params(args, batch_size, bulk_size, number_of_client, temp_out # we only test remote cluster params["--pipeline"] = "benchmark-only" params["--telemetry"] = "node-stats" - params["--telemetry-params"] = "node-stats-include-indices:true,node-stats-sample-interval:10,node-stats-include-mem:true,node-stats-include-process:true" + params["--telemetry-params"] = ("node-stats-include-indices:true," + "node-stats-sample-interval:10," + "node-stats-include-mem:true," + "node-stats-include-process:true") params["--workload-path"] = args.workload_path params["--workload-params"] = get_workload_params(batch_size, bulk_size, number_of_client) # generate output @@ -52,7 +79,7 @@ def run_benchmark(params): stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = proc.communicate() + _, stderr = proc.communicate() return proc.returncode == 0, stderr.decode('ascii') except KeyboardInterrupt as e: proc.terminate() @@ -67,10 +94,10 @@ def generate_random_index_name(): def run_batch_bulk_client_tests(args, test_id, batch, bulk, client): logger = logging.getLogger(__name__) result = Result(test_id, batch, bulk, client) - new_file, filename = tempfile.mkstemp() + _, filename = tempfile.mkstemp() params = get_benchmark_params(args, batch, bulk, client, filename) - logger.info(f"test_id: {test_id}, batch: {batch}, bulk:{bulk}, client:{client}") + logger.info("test_id: %s, batch: %d, bulk: %d, client: %d", test_id, batch, bulk, client) success = False err = None start = timer() @@ -79,7 +106,7 @@ def run_batch_bulk_client_tests(args, test_id, batch, bulk, client): finally: end = timer() if success: - with open(filename, newline='') as csvfile: + with open(filename, 'r', newline='') as csvfile: line_reader = csv.reader(csvfile, delimiter=',') output = {} for row in line_reader: @@ -102,7 +129,6 @@ def batch_bulk_client_tuning(args): batches = batch_schedule.steps bulks = bulk_schedule.steps number_of_clients = client_schedule.steps - success_result_ids = [] total = len(batches) * len(bulks) * len(number_of_clients) print(f"There will be {total} tests to run with {len(batches)} batch sizes, { len(bulks)} bulk sizes, " @@ -110,14 +136,25 @@ def batch_bulk_client_tuning(args): schedule_runner = ScheduleRunner(args, batch_schedule, bulk_schedule, client_schedule) results = schedule_runner.run(run_batch_bulk_client_tests) - optimal = find_optimal_result([results[id] for id in success_result_ids]) + + successful_result_ids = get_successful_ids(results, float(args.allowed_error_rate)) + optimal = find_optimal_result([results[result_id] for result_id in successful_result_ids]) if not optimal: print("All tests failed, couldn't find any results!") else: - print(f"the optimal batch size is: {optimal.batch_size}") + print(f"the optimal variable combination is: bulk size: {optimal.bulk_size}, " + f"batch size: {optimal.batch_size}, number of clients: {optimal.number_of_client}") return results +def get_successful_ids(results, allowed_error_rate): + successful_ids = [] + for result in results: + if result.success and result.error_rate <= allowed_error_rate: + successful_ids.append(result.test_id) + return successful_ids + + def find_optimal_result(results): total_time = sys.maxsize optimal = None diff --git a/osbenchmark/tuning/result.py b/osbenchmark/tuning/result.py index 7d906035f..1218c4327 100644 --- a/osbenchmark/tuning/result.py +++ b/osbenchmark/tuning/result.py @@ -1,7 +1,31 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + ERROR_RATE_KEY = "error rate" -class Result(object): +class Result: def __init__(self, test_id, batch_size, bulk_size, number_of_client): self.success = None self.test_id = test_id @@ -19,4 +43,3 @@ def set_output(self, success, total_time, output): return self.output = output self.error_rate = float(output[ERROR_RATE_KEY]) if ERROR_RATE_KEY in output else 0 # percentage - diff --git a/osbenchmark/tuning/schedule.py b/osbenchmark/tuning/schedule.py index e16f08113..6d01baa9d 100644 --- a/osbenchmark/tuning/schedule.py +++ b/osbenchmark/tuning/schedule.py @@ -1,3 +1,27 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + import uuid import itertools from functools import partial @@ -35,7 +59,7 @@ def exceeding_and_equal_check(bound, trend, current): return current <= bound -class Schedule(object): +class Schedule: def __init__(self, single_val, schedule_val, default_minimal, default_maximal, default_step_size): self.default_step_size = default_step_size self.default_maximal = default_maximal diff --git a/tests/tuning/optimal_finder_test.py b/tests/tuning/optimal_finder_test.py new file mode 100644 index 000000000..e211e4599 --- /dev/null +++ b/tests/tuning/optimal_finder_test.py @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from osbenchmark.tuning.optimal_finder import find_optimal_result, get_successful_ids +from osbenchmark.tuning.result import Result + + +@pytest.fixture() +def results(): + result1 = Result("id1", 0, 0, 0) + result2 = Result("id2", 0, 0, 0) + result3 = Result("id3", 0, 0, 0) + result4 = Result("id4", 0, 0, 0) + return [result1, result2, result3, result4] + + +def test_find_optimal_result(results): + results[0].set_output(True, 25, None) + results[1].set_output(True, 15, None) + results[2].set_output(True, 45, None) + results[3].set_output(True, 125, None) + assert find_optimal_result(results).test_id == "id2" + + +def test_get_successful_ids_all_failed(results): + results[0].set_output(False, 25, None) + results[1].set_output(False, 15, None) + results[2].set_output(False, 45, None) + results[3].set_output(False, 125, None) + assert len(get_successful_ids(results, 0)) == 0 + + +def test_get_successful_ids_error_rate(results): + results[0].set_output(False, 25, {"error rate": 0.1}) + results[1].set_output(True, 15, {"error rate": 0.2}) + results[2].set_output(True, 45, {"error rate": 0.3}) + results[3].set_output(True, 125, {"error rate": 0.4}) + assert len(get_successful_ids(results, 0.21)) == 1 + assert len(get_successful_ids(results, 0.31)) == 2 + assert len(get_successful_ids(results, 0.4)) == 3 diff --git a/tests/tuning/schedule_test.py b/tests/tuning/schedule_test.py new file mode 100644 index 000000000..5316ad6a2 --- /dev/null +++ b/tests/tuning/schedule_test.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from unittest import TestCase +from osbenchmark.tuning.schedule import Schedule, ScheduleRunner + + +class TestSchedule(TestCase): + def test_Schedule_with_batch_size(self): + schedule = Schedule("1", None, 0, 0, 0) + self.assertEqual([1], schedule.steps) + + def test_Schedule_with_schedule_val(self): + schedule = Schedule(None, "10:100:1:10", 0, 0, 0) + self.assertEqual(list(range(10, 101, 10)), schedule.steps) + + schedule = Schedule("1", "10:100:-11:10", 0, 0, 0) + self.assertEqual(list(range(100, 9, -10)), schedule.steps) + + schedule = Schedule("1", "@10:20:100", 0, 0, 0) + self.assertEqual([10, 20, 100], schedule.steps) + + schedule = Schedule(None, "10", 0, 100, 20) + self.assertEqual([10, 30, 50, 70, 90, 100], schedule.steps) + + +class FakeSchedule: + def __init__(self, steps): + self.steps = steps + + +def fake_callback(args, test_id, arg1, arg2): + return {"args": args, "arg1": arg1, "arg2": arg2} + + +class TestScheduleRunner(TestCase): + def test_ScheduleRunner(self): + schedule1 = FakeSchedule([1, 2]) + schedule2 = FakeSchedule([4, 5]) + args = {} + runner = ScheduleRunner(args, schedule1, schedule2) + results = runner.run(fake_callback).values() + self.assertEqual({(result["arg1"], result["arg2"]) for result in results}, {(1,4), (2,4), (1,5), (2,5)})