Skip to content

Commit

Permalink
Add UT, copyright, fix pylint reported code
Browse files Browse the repository at this point in the history
Signed-off-by: Liyun Xiu <[email protected]>
  • Loading branch information
chishui committed Apr 28, 2024
1 parent 62b337b commit 4800397
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 11 deletions.
23 changes: 23 additions & 0 deletions osbenchmark/tuning/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
53 changes: 45 additions & 8 deletions osbenchmark/tuning/optimal_finder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import sys
import csv
Expand All @@ -20,7 +44,10 @@ def get_benchmark_params(args, batch_size, bulk_size, number_of_client, temp_out
# we only test remote cluster
params["--pipeline"] = "benchmark-only"
params["--telemetry"] = "node-stats"
params["--telemetry-params"] = "node-stats-include-indices:true,node-stats-sample-interval:10,node-stats-include-mem:true,node-stats-include-process:true"
params["--telemetry-params"] = ("node-stats-include-indices:true,"
"node-stats-sample-interval:10,"
"node-stats-include-mem:true,"
"node-stats-include-process:true")
params["--workload-path"] = args.workload_path
params["--workload-params"] = get_workload_params(batch_size, bulk_size, number_of_client)
# generate output
Expand Down Expand Up @@ -52,7 +79,7 @@ def run_benchmark(params):
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

stdout, stderr = proc.communicate()
_, stderr = proc.communicate()
return proc.returncode == 0, stderr.decode('ascii')
except KeyboardInterrupt as e:
proc.terminate()
Expand All @@ -67,10 +94,10 @@ def generate_random_index_name():
def run_batch_bulk_client_tests(args, test_id, batch, bulk, client):
logger = logging.getLogger(__name__)
result = Result(test_id, batch, bulk, client)
new_file, filename = tempfile.mkstemp()
_, filename = tempfile.mkstemp()
params = get_benchmark_params(args, batch, bulk, client, filename)

logger.info(f"test_id: {test_id}, batch: {batch}, bulk:{bulk}, client:{client}")
logger.info("test_id: %s, batch: %d, bulk: %d, client: %d", test_id, batch, bulk, client)
success = False
err = None
start = timer()
Expand All @@ -79,7 +106,7 @@ def run_batch_bulk_client_tests(args, test_id, batch, bulk, client):
finally:
end = timer()
if success:
with open(filename, newline='') as csvfile:
with open(filename, 'r', newline='') as csvfile:
line_reader = csv.reader(csvfile, delimiter=',')
output = {}
for row in line_reader:
Expand All @@ -102,22 +129,32 @@ def batch_bulk_client_tuning(args):
batches = batch_schedule.steps
bulks = bulk_schedule.steps
number_of_clients = client_schedule.steps
success_result_ids = []

total = len(batches) * len(bulks) * len(number_of_clients)
print(f"There will be {total} tests to run with {len(batches)} batch sizes, { len(bulks)} bulk sizes, "
f"{len(number_of_clients)} client numbers.")

schedule_runner = ScheduleRunner(args, batch_schedule, bulk_schedule, client_schedule)
results = schedule_runner.run(run_batch_bulk_client_tests)
optimal = find_optimal_result([results[id] for id in success_result_ids])

successful_result_ids = get_successful_ids(results, float(args.allowed_error_rate))
optimal = find_optimal_result([results[result_id] for result_id in successful_result_ids])
if not optimal:
print("All tests failed, couldn't find any results!")
else:
print(f"the optimal batch size is: {optimal.batch_size}")
print(f"the optimal variable combination is: bulk size: {optimal.bulk_size}, "
f"batch size: {optimal.batch_size}, number of clients: {optimal.number_of_client}")
return results


def get_successful_ids(results, allowed_error_rate):
successful_ids = []
for result in results:
if result.success and result.error_rate <= allowed_error_rate:
successful_ids.append(result.test_id)
return successful_ids


def find_optimal_result(results):
total_time = sys.maxsize
optimal = None
Expand Down
27 changes: 25 additions & 2 deletions osbenchmark/tuning/result.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,31 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ERROR_RATE_KEY = "error rate"


class Result(object):
class Result:
def __init__(self, test_id, batch_size, bulk_size, number_of_client):
self.success = None
self.test_id = test_id
Expand All @@ -19,4 +43,3 @@ def set_output(self, success, total_time, output):
return
self.output = output
self.error_rate = float(output[ERROR_RATE_KEY]) if ERROR_RATE_KEY in output else 0 # percentage

26 changes: 25 additions & 1 deletion osbenchmark/tuning/schedule.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import uuid
import itertools
from functools import partial
Expand Down Expand Up @@ -35,7 +59,7 @@ def exceeding_and_equal_check(bound, trend, current):
return current <= bound


class Schedule(object):
class Schedule:
def __init__(self, single_val, schedule_val, default_minimal, default_maximal, default_step_size):
self.default_step_size = default_step_size
self.default_maximal = default_maximal
Expand Down
62 changes: 62 additions & 0 deletions tests/tuning/optimal_finder_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
from osbenchmark.tuning.optimal_finder import find_optimal_result, get_successful_ids
from osbenchmark.tuning.result import Result


@pytest.fixture()
def results():
result1 = Result("id1", 0, 0, 0)
result2 = Result("id2", 0, 0, 0)
result3 = Result("id3", 0, 0, 0)
result4 = Result("id4", 0, 0, 0)
return [result1, result2, result3, result4]


def test_find_optimal_result(results):
results[0].set_output(True, 25, None)
results[1].set_output(True, 15, None)
results[2].set_output(True, 45, None)
results[3].set_output(True, 125, None)
assert find_optimal_result(results).test_id == "id2"


def test_get_successful_ids_all_failed(results):
results[0].set_output(False, 25, None)
results[1].set_output(False, 15, None)
results[2].set_output(False, 45, None)
results[3].set_output(False, 125, None)
assert len(get_successful_ids(results, 0)) == 0


def test_get_successful_ids_error_rate(results):
results[0].set_output(False, 25, {"error rate": 0.1})
results[1].set_output(True, 15, {"error rate": 0.2})
results[2].set_output(True, 45, {"error rate": 0.3})
results[3].set_output(True, 125, {"error rate": 0.4})
assert len(get_successful_ids(results, 0.21)) == 1
assert len(get_successful_ids(results, 0.31)) == 2
assert len(get_successful_ids(results, 0.4)) == 3
64 changes: 64 additions & 0 deletions tests/tuning/schedule_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase
from osbenchmark.tuning.schedule import Schedule, ScheduleRunner


class TestSchedule(TestCase):
def test_Schedule_with_batch_size(self):
schedule = Schedule("1", None, 0, 0, 0)
self.assertEqual([1], schedule.steps)

def test_Schedule_with_schedule_val(self):
schedule = Schedule(None, "10:100:1:10", 0, 0, 0)
self.assertEqual(list(range(10, 101, 10)), schedule.steps)

schedule = Schedule("1", "10:100:-11:10", 0, 0, 0)
self.assertEqual(list(range(100, 9, -10)), schedule.steps)

schedule = Schedule("1", "@10:20:100", 0, 0, 0)
self.assertEqual([10, 20, 100], schedule.steps)

schedule = Schedule(None, "10", 0, 100, 20)
self.assertEqual([10, 30, 50, 70, 90, 100], schedule.steps)


class FakeSchedule:
def __init__(self, steps):
self.steps = steps


def fake_callback(args, test_id, arg1, arg2):
return {"args": args, "arg1": arg1, "arg2": arg2}


class TestScheduleRunner(TestCase):
def test_ScheduleRunner(self):
schedule1 = FakeSchedule([1, 2])
schedule2 = FakeSchedule([4, 5])
args = {}
runner = ScheduleRunner(args, schedule1, schedule2)
results = runner.run(fake_callback).values()
self.assertEqual({(result["arg1"], result["arg2"]) for result in results}, {(1,4), (2,4), (1,5), (2,5)})

0 comments on commit 4800397

Please sign in to comment.