Add UT, copyright, fix pylint reported code

Signed-off-by: Liyun Xiu <[email protected]>
opensearch-project · Apr 28, 2024 · 4800397 · 4800397
1 parent 62b337b
commit 4800397
Show file tree

Hide file tree

Showing 6 changed files with 244 additions and 11 deletions.
diff --git a/osbenchmark/tuning/__init__.py b/osbenchmark/tuning/__init__.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/osbenchmark/tuning/optimal_finder.py b/osbenchmark/tuning/optimal_finder.py
@@ -1,3 +1,27 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import os
 import sys
 import csv
@@ -20,7 +44,10 @@ def get_benchmark_params(args, batch_size, bulk_size, number_of_client, temp_out
     # we only test remote cluster
     params["--pipeline"] = "benchmark-only"
     params["--telemetry"] = "node-stats"
-    params["--telemetry-params"] = "node-stats-include-indices:true,node-stats-sample-interval:10,node-stats-include-mem:true,node-stats-include-process:true"
+    params["--telemetry-params"] = ("node-stats-include-indices:true,"
+                                    "node-stats-sample-interval:10,"
+                                    "node-stats-include-mem:true,"
+                                    "node-stats-include-process:true")
     params["--workload-path"] = args.workload_path
     params["--workload-params"] = get_workload_params(batch_size, bulk_size, number_of_client)
     # generate output
@@ -52,7 +79,7 @@ def run_benchmark(params):
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE)
 
-        stdout, stderr = proc.communicate()
+        _, stderr = proc.communicate()
         return proc.returncode == 0, stderr.decode('ascii')
     except KeyboardInterrupt as e:
         proc.terminate()
@@ -67,10 +94,10 @@ def generate_random_index_name():
 def run_batch_bulk_client_tests(args, test_id, batch, bulk, client):
     logger = logging.getLogger(__name__)
     result = Result(test_id, batch, bulk, client)
-    new_file, filename = tempfile.mkstemp()
+    _, filename = tempfile.mkstemp()
     params = get_benchmark_params(args, batch, bulk, client, filename)
 
-    logger.info(f"test_id: {test_id}, batch: {batch}, bulk:{bulk}, client:{client}")
+    logger.info("test_id: %s, batch: %d, bulk: %d, client: %d", test_id, batch, bulk, client)
     success = False
     err = None
     start = timer()
@@ -79,7 +106,7 @@ def run_batch_bulk_client_tests(args, test_id, batch, bulk, client):
     finally:
         end = timer()
         if success:
-            with open(filename, newline='') as csvfile:
+            with open(filename, 'r', newline='') as csvfile:
                 line_reader = csv.reader(csvfile, delimiter=',')
                 output = {}
                 for row in line_reader:
@@ -102,22 +129,32 @@ def batch_bulk_client_tuning(args):
     batches = batch_schedule.steps
     bulks = bulk_schedule.steps
     number_of_clients = client_schedule.steps
-    success_result_ids = []
 
     total = len(batches) * len(bulks) * len(number_of_clients)
     print(f"There will be {total} tests to run with {len(batches)} batch sizes, { len(bulks)} bulk sizes, "
           f"{len(number_of_clients)} client numbers.")
 
     schedule_runner = ScheduleRunner(args, batch_schedule, bulk_schedule, client_schedule)
     results = schedule_runner.run(run_batch_bulk_client_tests)
-    optimal = find_optimal_result([results[id] for id in success_result_ids])
+
+    successful_result_ids = get_successful_ids(results, float(args.allowed_error_rate))
+    optimal = find_optimal_result([results[result_id] for result_id in successful_result_ids])
     if not optimal:
         print("All tests failed, couldn't find any results!")
     else:
-        print(f"the optimal batch size is: {optimal.batch_size}")
+        print(f"the optimal variable combination is: bulk size: {optimal.bulk_size}, "
+              f"batch size: {optimal.batch_size}, number of clients: {optimal.number_of_client}")
     return results
 
 
+def get_successful_ids(results, allowed_error_rate):
+    successful_ids = []
+    for result in results:
+        if result.success and result.error_rate <= allowed_error_rate:
+            successful_ids.append(result.test_id)
+    return successful_ids
+
+
 def find_optimal_result(results):
     total_time = sys.maxsize
     optimal = None

diff --git a/osbenchmark/tuning/result.py b/osbenchmark/tuning/result.py
@@ -1,7 +1,31 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 ERROR_RATE_KEY = "error rate"
 
 
-class Result(object):
+class Result:
     def __init__(self, test_id, batch_size, bulk_size, number_of_client):
         self.success = None
         self.test_id = test_id
@@ -19,4 +43,3 @@ def set_output(self, success, total_time, output):
             return
         self.output = output
         self.error_rate = float(output[ERROR_RATE_KEY]) if ERROR_RATE_KEY in output else 0 # percentage
-
diff --git a/osbenchmark/tuning/schedule.py b/osbenchmark/tuning/schedule.py
@@ -1,3 +1,27 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import uuid
 import itertools
 from functools import partial
@@ -35,7 +59,7 @@ def exceeding_and_equal_check(bound, trend, current):
         return current <= bound
 
 
-class Schedule(object):
+class Schedule:
     def __init__(self, single_val, schedule_val, default_minimal, default_maximal, default_step_size):
         self.default_step_size = default_step_size
         self.default_maximal = default_maximal

diff --git a/tests/tuning/optimal_finder_test.py b/tests/tuning/optimal_finder_test.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+from osbenchmark.tuning.optimal_finder import find_optimal_result, get_successful_ids
+from osbenchmark.tuning.result import Result
+
+
+@pytest.fixture()
+def results():
+    result1 = Result("id1", 0, 0, 0)
+    result2 = Result("id2", 0, 0, 0)
+    result3 = Result("id3", 0, 0, 0)
+    result4 = Result("id4", 0, 0, 0)
+    return [result1, result2, result3, result4]
+
+
+def test_find_optimal_result(results):
+    results[0].set_output(True, 25, None)
+    results[1].set_output(True, 15, None)
+    results[2].set_output(True, 45, None)
+    results[3].set_output(True, 125, None)
+    assert find_optimal_result(results).test_id == "id2"
+
+
+def test_get_successful_ids_all_failed(results):
+    results[0].set_output(False, 25, None)
+    results[1].set_output(False, 15, None)
+    results[2].set_output(False, 45, None)
+    results[3].set_output(False, 125, None)
+    assert len(get_successful_ids(results, 0)) == 0
+
+
+def test_get_successful_ids_error_rate(results):
+    results[0].set_output(False, 25, {"error rate": 0.1})
+    results[1].set_output(True, 15, {"error rate": 0.2})
+    results[2].set_output(True, 45, {"error rate": 0.3})
+    results[3].set_output(True, 125, {"error rate": 0.4})
+    assert len(get_successful_ids(results, 0.21)) == 1
+    assert len(get_successful_ids(results, 0.31)) == 2
+    assert len(get_successful_ids(results, 0.4)) == 3
diff --git a/tests/tuning/schedule_test.py b/tests/tuning/schedule_test.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+# Modifications Copyright OpenSearch Contributors. See
+# GitHub history for details.
+# Licensed to Elasticsearch B.V. under one or more contributor
+# license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright
+# ownership. Elasticsearch B.V. licenses this file to you under
+# the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest import TestCase
+from osbenchmark.tuning.schedule import Schedule, ScheduleRunner
+
+
+class TestSchedule(TestCase):
+    def test_Schedule_with_batch_size(self):
+        schedule = Schedule("1", None, 0, 0, 0)
+        self.assertEqual([1], schedule.steps)
+
+    def test_Schedule_with_schedule_val(self):
+        schedule = Schedule(None, "10:100:1:10", 0, 0, 0)
+        self.assertEqual(list(range(10, 101, 10)), schedule.steps)
+
+        schedule = Schedule("1", "10:100:-11:10", 0, 0, 0)
+        self.assertEqual(list(range(100, 9, -10)), schedule.steps)
+
+        schedule = Schedule("1", "@10:20:100", 0, 0, 0)
+        self.assertEqual([10, 20, 100], schedule.steps)
+
+        schedule = Schedule(None, "10", 0, 100, 20)
+        self.assertEqual([10, 30, 50, 70, 90, 100], schedule.steps)
+
+
+class FakeSchedule:
+    def __init__(self, steps):
+        self.steps = steps
+
+
+def fake_callback(args, test_id, arg1, arg2):
+    return {"args": args, "arg1": arg1, "arg2": arg2}
+
+
+class TestScheduleRunner(TestCase):
+    def test_ScheduleRunner(self):
+        schedule1 = FakeSchedule([1, 2])
+        schedule2 = FakeSchedule([4, 5])
+        args = {}
+        runner = ScheduleRunner(args, schedule1, schedule2)
+        results = runner.run(fake_callback).values()
+        self.assertEqual({(result["arg1"], result["arg2"]) for result in results}, {(1,4), (2,4), (1,5), (2,5)})