Improve benchmark reproducibility

reiniscirpons · Aug 23, 2022 · 68c7f5d · 68c7f5d
1 parent 08132b6
commit 68c7f5d
Show file tree

Hide file tree

Showing 10 changed files with 190 additions and 113 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,8 +8,9 @@
 .coverage
 .tox/
 __pycache__/
-benchmarks/bench_minimize*
 benchmarks/samples/
+benchmarks/raw_benchmark_data/
+benchmarks/processed_benchmark_data/
 build/
 dist/
 docs/build/

diff --git a/Makefile b/Makefile
@@ -44,6 +44,9 @@ benchmark-equal:
 	pytest -n auto -v benchmarks/bench_equal.py --benchmark-save=equal
 
 INTERVAL_TEST_CASES = 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19
+define benchmark_function
+	pytest -n auto -v benchmarks/bench_$(1).py --benchmark-storage=file://./benchmarks/raw_benchmark_data/ --benchmark-save=$(2)
+endef
 
 benchmark-minimize-generate-benchmarks:
 	$(foreach var,$(INTERVAL_TEST_CASES),sed 's/{{NUM}}/$(var)/g' ./benchmarks/templates/bench_minimize.py > ./benchmarks/bench_minimize_$(var).py;)
@@ -52,19 +55,29 @@ benchmark-interval-multiply-generate-benchmarks:
 	$(foreach var,$(INTERVAL_TEST_CASES),sed 's/{{NUM}}/$(var)/g' ./benchmarks/templates/bench_interval_multiply.py > ./benchmarks/bench_interval_multiply_$(var).py;)
 
 benchmark-minimize: benchmark-minimize-generate-benchmarks
-	$(foreach var,$(INTERVAL_TEST_CASES),pytest -n 6 -v benchmarks/bench_minimize_$(var).py --benchmark-save=minimize;)
+	mkdir -p ./benchmarks/raw_benchmark_data/
+	rm -f ./benchmarks/raw_benchmark_data/*/*_minimize.json
+	$(foreach var,$(INTERVAL_TEST_CASES),$(call benchmark_function,minimize_$(var),minimize);)
 
 benchmark-interval-multiply: benchmark-interval-multiply-generate-benchmarks
-	$(foreach var,$(INTERVAL_TEST_CASES),pytest -n 6 -v benchmarks/bench_interval_multiply_$(var).py --benchmark-save=interval_multiply;)
+	mkdir -p ./benchmarks/raw_benchmark_data/
+	rm -f ./benchmarks/raw_benchmark_data/*/*_interval_multiply.json
+	$(foreach var,$(INTERVAL_TEST_CASES),$(call benchmark_function,interval_multiply_$(var),interval_multiply);)
 
 benchmark-minimal-multiply:
-	pytest -n auto -v benchmarks/bench_minimal_multiply.py --benchmark-save=minimal_multiply
+	mkdir -p ./benchmarks/raw_benchmark_data/
+	rm -f ./benchmarks/raw_benchmark_data/*/*_minimal_multiply.json
+	$(call benchmark_function,minimal_multiply,minimal_multiply)
 
 benchmark-isomorphism:
-	pytest -n auto -v benchmarks/bench_isomorphism.py --benchmark-save=isomorphism
+	mkdir -p ./benchmarks/raw_benchmark_data/
+	rm -f ./benchmarks/raw_benchmark_data/*/*_isomorphism.json
+	$(call benchmark_function,isomorphism,isomorphism)
 
 benchmark-minword:
-	pytest -n auto -v benchmarks/bench_minword.py --benchmark-save=minword
+	mkdir -p ./benchmarks/raw_benchmark_data/
+	rm -f ./benchmarks/raw_benchmark_data/*/*_minword.json
+	$(call benchmark_function,minword,minword)
 
 benchmark-all: benchmark-interval benchmark-equal  benchmark-minimize benchmark-interval-multiply benchmark-minimal-multiply benchmark-isomorphism benchmark-minword
 

diff --git a/benchmarks/bench_equal.py b/benchmarks/bench_equal.py
@@ -17,23 +17,12 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+import freebandlib
 from freebandlib import equal_in_free_band
 
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
+
 samples = []
 path = "benchmarks/samples"
 for x in sorted(os.listdir(path)):

diff --git a/benchmarks/bench_interval.py b/benchmarks/bench_interval.py
@@ -11,23 +11,12 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+import freebandlib
 from freebandlib.transducer import interval_transducer
 
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
+
 samples = []
 path = "benchmarks/samples"
 for x in sorted(os.listdir(path)):

diff --git a/benchmarks/bench_isomorphism.py b/benchmarks/bench_isomorphism.py
@@ -11,23 +11,12 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+import freebandlib
 from freebandlib import transducer_isomorphism
 
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
+
 
 def get_samples(fnam):
     f = gzip.open(fnam, "rb")

diff --git a/benchmarks/bench_minimal_multiply.py b/benchmarks/bench_minimal_multiply.py
@@ -17,23 +17,12 @@
 # First 13 digits of the golden ratio.
 random.seed(1618033988749)
 
-from pytest_benchmark.utils import safe_dumps
-
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+import freebandlib
 from freebandlib import multiply, transducer_cont
 
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
+
 
 def get_samples(fnam):
     f = gzip.open(fnam, "rb")

diff --git a/benchmarks/bench_minword.py b/benchmarks/bench_minword.py
@@ -11,23 +11,11 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+import freebandlib
 from freebandlib import min_word, transducer_cont
 
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : None
 
 def get_samples(fnam):
     f = gzip.open(fnam, "rb")

diff --git a/benchmarks/extract_data.py b/benchmarks/extract_data.py
@@ -0,0 +1,145 @@
+import json
+import os
+
+_output_dir = "./raw_benchmark_data/"
+
+
+def read_benchmarks(bench_name):
+    files = []
+    for file in os.listdir(_output_dir):
+        filename = os.fsdecode(file)
+        if bench_name in filename:
+            files.append(filename)
+    print(files)
+
+    results = []
+    for file in files:
+        with open(_output_dir + file, "r") as in_file:
+            data = json.load(in_file)
+            if "benchmarks" in data:
+                for benchmark in data["benchmarks"]:
+                    name = benchmark["param"]
+                    mean_time = benchmark["stats"]["mean"]
+                    results.append((name, mean_time))
+
+    return results
+
+
+_data_dir = "./processed_benchmark_data/"
+
+
+def write_tuples(bench_name, name1, data):
+    with open(_data_dir + bench_name + "_" + name1 + ".dat", "w") as out_file:
+        for t in data:
+            t = " ".join(map(str, t))
+            out_file.write(t + "\n")
+
+
+bench_name = "interval"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    alphabet_size = int(name.split("-")[0])
+    word_length = int(name.split("-")[1])
+    results.append((alphabet_size, word_length, mean_time))
+results.sort()
+
+results_x = {}
+for x, y, z in results:
+    if x not in results_x:
+        results_x[x] = []
+    results_x[x].append((y, z))
+for x in results_x:
+    results_x[x].sort()
+
+results_y = {}
+for x, y, z in results:
+    if y not in results_y:
+        results_y[y] = []
+    results_y[y].append((x, z))
+for y in results_y:
+    results_y[y].sort()
+
+results_p = []
+for x, y, z in results:
+    results_p.append((x * y, z))
+results_p.sort()
+
+write_tuples(bench_name, "all", results)
+write_tuples(bench_name, "product", results_p)
+for x in results_x:
+    write_tuples(bench_name, "x_" + str(x), results_x[x])
+for y in results_y:
+    write_tuples(bench_name, "y_" + str(y), results_y[y])
+
+
+bench_name = "minimize"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    transducer_size = int(name.split("-")[0])
+    results.append((transducer_size, mean_time))
+results.sort()
+write_tuples(bench_name, "all", results)
+
+bench_name = "isomorphism"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    transducer_size = int(name.split("-")[0])
+    results.append((transducer_size, mean_time))
+results.sort()
+write_tuples(bench_name, "all", results)
+
+bench_name = "minword"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    transducer_size = int(name.split("-")[0])
+    alphabet_size = int(name.split("-")[1])
+    results.append((transducer_size, alphabet_size, mean_time))
+results.sort()
+write_tuples(bench_name, "all", results)
+results_p = []
+for x, y, z in results:
+    results_p.append((x * y, z))
+results_p.sort()
+write_tuples(bench_name, "product", results_p)
+
+bench_name = "minimal_multiply"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    alphabet_size = int(name.split("-")[0])
+    transducer1_size = int(name.split("-")[1])
+    transducer2_size = int(name.split("-")[2])
+    results.append((alphabet_size, transducer1_size, transducer2_size, mean_time))
+results.sort()
+write_tuples(bench_name, "all", results)
+results_p = []
+for x, y, z, w in results:
+    results_p.append((x*x + y + z, w))
+results_p.sort()
+write_tuples(bench_name, "product", results_p)
+
+bench_name = "interval_multiply"
+results_temp = read_benchmarks(bench_name)
+results = []
+for result in results_temp:
+    name, mean_time = result
+    alphabet_size = int(name.split("-")[0])
+    transducer1_size = int(name.split("-")[1])
+    transducer2_size = int(name.split("-")[2])
+    results.append((alphabet_size, transducer1_size, transducer2_size, mean_time))
+results.sort()
+write_tuples(bench_name, "all", results)
+results_p = []
+for x, y, z, w in results:
+    results_p.append((x*x + y + z, w))
+results_p.sort()
+write_tuples(bench_name, "product", results_p)
diff --git a/benchmarks/templates/bench_interval_multiply.py b/benchmarks/templates/bench_interval_multiply.py
@@ -21,23 +21,11 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
+import freebandlib
 from freebandlib import multiply, transducer_cont
 
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
 
 def get_samples(fnam):
     f = gzip.open(fnam, "rb")

diff --git a/benchmarks/templates/bench_minimize.py b/benchmarks/templates/bench_minimize.py
@@ -15,25 +15,11 @@
 
 import pytest_benchmark
 
-from pytest_benchmark.utils import safe_dumps
-
+import freebandlib
 from freebandlib import transducer_minimize
 
-
-def __save(self, output_json, save):
-    output_file = self.get("%s_%s.json" % (self._next_num, save))
-    assert not output_file.exists()
-    for x in output_json["benchmarks"]:
-        del x["params"]
-    with output_file.open("wb") as fh:
-        fh.write(safe_dumps(output_json, ensure_ascii=True, indent=4).encode())
-    self.logger.info("Saved benchmark data in: %s" % output_file)
-
-
-pytest_benchmark.storage.file.FileStorage.save = __save
-
-from freebandlib.transducer import interval_transducer
-
+# Hack to prevent excessive benchmark output
+freebandlib.Transducer.__repr__ = lambda x : ""
 
 def get_samples(fnam):
     f = gzip.open(fnam, "rb")