Skip to content

Commit

Permalink
[tuner] Run baseline benchmarks first and check regression
Browse files Browse the repository at this point in the history
Signed-off-by: Bangtian Liu <[email protected]>
  • Loading branch information
bangtianliu committed Jan 8, 2025
1 parent 0c53fb0 commit 46d6a87
Showing 1 changed file with 59 additions and 18 deletions.
77 changes: 59 additions & 18 deletions tuner/tuner/libtuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,24 +793,6 @@ def benchmark(
):
logging.debug("benchmark()")

task_list = [
BenchmarkPack(
iree_benchmark_module_flags=tuning_client.get_iree_benchmark_module_flags(),
benchmark_timeout=tuning_client.get_benchmark_timeout_s(),
candidate_tracker=candidate_trackers[i],
)
for i in compiled_candidates
if i != 0
]
worker_context_queue = create_worker_context_queue(args.devices)
candidate_results = multiprocess_progress_wrapper(
num_worker=len(args.devices),
task_list=task_list,
function=run_iree_benchmark_module_command,
initializer=init_worker_context,
initializer_inputs=(worker_context_queue,),
)

# Benchmarking baselines on each involved device.
worker_context_queue = create_worker_context_queue(args.devices)
baseline_task_list = [
Expand All @@ -831,6 +813,24 @@ def benchmark(
for r in baseline_results:
baseline_times_by_device[r.device_id] = r.time

task_list = [
BenchmarkPack(
iree_benchmark_module_flags=tuning_client.get_iree_benchmark_module_flags(),
benchmark_timeout=tuning_client.get_benchmark_timeout_s(),
candidate_tracker=candidate_trackers[i],
)
for i in compiled_candidates
if i != 0
]
worker_context_queue = create_worker_context_queue(args.devices)
candidate_results = multiprocess_progress_wrapper(
num_worker=len(args.devices),
task_list=task_list,
function=run_iree_benchmark_module_command,
initializer=init_worker_context,
initializer_inputs=(worker_context_queue,),
)

# Select top candidates
def get_speedup(result: BenchmarkResult) -> float:
return result.time / baseline_times_by_device[result.device_id]
Expand All @@ -848,4 +848,45 @@ def get_speedup(result: BenchmarkResult) -> float:
)

top_candidates = [result.candidate_id for result in best_results]

# Benchmarking baselines on each involved device again to check performance regression on devices.
worker_context_queue = create_worker_context_queue(args.devices)
baseline_task_list = [
BenchmarkPack(
iree_benchmark_module_flags=tuning_client.get_iree_benchmark_module_flags(),
benchmark_timeout=tuning_client.get_benchmark_timeout_s(),
candidate_tracker=candidate_trackers[0],
)
] * len(args.devices)
post_baseline_results = multiprocess_progress_wrapper(
num_worker=len(args.devices),
task_list=baseline_task_list,
function=run_iree_benchmark_module_command,
initializer=init_worker_context,
initializer_inputs=(worker_context_queue,),
)
post_baseline_times_by_device = {}
for r in post_baseline_results:
post_baseline_times_by_device[r.device_id] = r.time

assert (
baseline_times_by_device.keys() == post_baseline_times_by_device.keys()
), "Error: The device IDs in baseline and post-baseline results do not match."

regression_detected = False
for device_id in baseline_times_by_device:
baseline_time = baseline_times_by_device[device_id]
post_time = post_baseline_times_by_device[device_id]

if post_time > baseline_time * 1.03:
regression_detected = True
percentage_slower = ((post_time - baseline_time) / baseline_time) * 100
logging.info(
f"Performance regression detected on device {device_id}: "
f"Baseline time = {baseline_time}, Post-baseline time = {post_time}, "
f"Slower by {percentage_slower:.3f}%"
)

if not regression_detected:
logging.info("No performance regressions detected.")
return top_candidates

0 comments on commit 46d6a87

Please sign in to comment.