From f64c84b99d442632be2276d31c1c078ce0f3c700 Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian@gmail.com>
Date: Thu, 9 Jan 2025 14:42:24 -0600
Subject: [PATCH] add helper functions and tests

Signed-off-by: Bangtian Liu <liubangtian@gmail.com>
---
 tuner/tuner/libtuner.py      | 46 ++++++++++++++++++++++++++++++++++++
 tuner/tuner/libtuner_test.py | 37 +++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/tuner/tuner/libtuner.py b/tuner/tuner/libtuner.py
index 8c5b15761..28c2e4429 100644
--- a/tuner/tuner/libtuner.py
+++ b/tuner/tuner/libtuner.py
@@ -221,6 +221,52 @@ def validate_devices(user_devices: list[str]) -> None:
         )
 
 
+def validate_benchmark_results(
+    benchmark_results: list[BenchmarkResult],
+) -> list[BenchmarkResult]:
+    filtered_benchmark_results = [r for r in benchmark_results if math.isfinite(r.time)]
+    if len(filtered_benchmark_results) == 0:
+        logging.error("No successful candidate benchmarks.")
+
+    return filtered_benchmark_results
+
+
+def map_baseline_by_device(baseline_results: list[BenchmarkResult]) -> dict[str, float]:
+    return {r.device_id: r.time for r in baseline_results}
+
+
+def validate_baselines_device_ids_match(
+    first_baseline_by_device: dict[str, float],
+    second_baseline_by_device: dict[str, float],
+) -> bool:
+    return first_baseline_by_device.keys() == second_baseline_by_device.keys()
+
+
+def validate_baseline_regression(
+    first_baseline_by_device: dict[str, float],
+    second_baseline_by_device: dict[str, float],
+) -> bool:
+    regression_detected = False
+    for device_id in first_baseline_by_device:
+        if device_id not in second_baseline_by_device:
+            continue
+        first_baseline_time = first_baseline_by_device[device_id]
+        second_baseline_time = second_baseline_by_device[device_id]
+
+        if second_baseline_time > first_baseline_time * 1.03:
+            percentage_slower = (
+                (second_baseline_time - first_baseline_time) / first_baseline_time
+            ) * 100
+            logging.warning(
+                f"Performance regression detected on device {device_id}: "
+                f"Baseline time = {first_baseline_time}, Post-baseline time = {second_baseline_time}, "
+                f"Slower by {percentage_slower:.3f}%"
+            )
+            regression_detected = True
+
+    return regression_detected
+
+
 class ExecutionPhases(str, Enum):
     dont_stop = ""
     generate_candidates = "generate-candidates"
diff --git a/tuner/tuner/libtuner_test.py b/tuner/tuner/libtuner_test.py
index cad57a3cd..1ef32daf1 100644
--- a/tuner/tuner/libtuner_test.py
+++ b/tuner/tuner/libtuner_test.py
@@ -233,3 +233,40 @@ def test_select_best_benchmark_results() -> None:
 
 def test_enum_collision():
     from iree.compiler.dialects import linalg, vector, iree_gpu, iree_codegen, iree_input  # type: ignore
+
+
+def test_validate_benchmark_results():
+    benchmark_results = [
+        libtuner.BenchmarkResult(0, math.inf, "hip://0"),
+    ]
+
+    result = libtuner.validate_benchmark_results(benchmark_results)
+    assert result == []
+
+    benchmark_results = [
+        libtuner.BenchmarkResult(0, math.inf, "hip://0"),
+        libtuner.BenchmarkResult(0, 0.1, "hip://1"),
+    ]
+    result = libtuner.validate_benchmark_results(benchmark_results)
+    assert len(result) == 1
+    assert result[0].candidate_id == 0
+    assert result[0].time == 0.1
+    assert result[0].device_id == "hip://1"
+
+
+def test_validate_baselines_device_id_match():
+    first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
+    second_baseline = {"hip://1": 1500.0, "hip://2": 2500.0}
+
+    result = libtuner.validate_baselines_device_ids_match(
+        first_baseline, second_baseline
+    )
+    assert result is False
+
+    first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
+    second_baseline = {"hip://0": 1500.0, "hip://1": 2500.0}
+
+    result = libtuner.validate_baselines_device_ids_match(
+        first_baseline, second_baseline
+    )
+    assert result is True